gremlin/gremlin.in: Add function to iterate over a bin's children.
[autoys] / gremlin / gremlin.in
CommitLineData
d4a7d7b5 1#! @PYTHON@
583b7e4a
MW
2###
3### Convert a directory tree of audio files
4###
5### (c) 2010 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
9e3a516f
MW
10### This file is part of the `autoys' audio tools collection.
11###
12### `autoys' is free software; you can redistribute it and/or modify
583b7e4a
MW
13### it under the terms of the GNU General Public License as published by
14### the Free Software Foundation; either version 2 of the License, or
15### (at your option) any later version.
16###
9e3a516f 17### `autoys' is distributed in the hope that it will be useful,
583b7e4a
MW
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20### GNU General Public License for more details.
21###
22### You should have received a copy of the GNU General Public License
9e3a516f 23### along with `autoys'; if not, write to the Free Software Foundation,
583b7e4a
MW
24### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26###--------------------------------------------------------------------------
27### External dependencies.
28
29## Language features.
30from __future__ import with_statement
31
32## Standard Python libraries.
33import sys as SYS
34import os as OS
35import errno as E
36import time as T
37import unicodedata as UD
38import fnmatch as FN
39import re as RX
40import shutil as SH
41import optparse as OP
42import threading as TH
43import shlex as L
00beb9e5 44from math import sqrt, ceil
583b7e4a
MW
45from contextlib import contextmanager
46
47## eyeD3 tag fettling.
608b936e 48import eyed3 as E3
583b7e4a
MW
49
50## Gstreamer. It picks up command-line arguments -- most notably `--help' --
51## and processes them itself. Of course, its help is completely wrong. This
52## kludge is due to Jonas Wagner.
53_argv, SYS.argv = SYS.argv, []
54import gobject as G
55import gio as GIO
56import gst as GS
57SYS.argv = _argv
58
59## Python Imaging.
60from PIL import Image as I
61
62## Python parsing.
63import pyparsing as P
64
65###--------------------------------------------------------------------------
66### Special initialization.
67
d4a7d7b5 68VERSION = '@VERSION@'
583b7e4a
MW
69
70## GLib.
71G.threads_init()
72
73###--------------------------------------------------------------------------
74### Eyecandy progress reports.
75
76def charwidth(s):
77 """
78 Return the width of S, in characters.
79
80 Specifically, this is the number of backspace characters required to
81 overprint the string S. If the current encoding for `stdout' appears to be
82 Unicode then do a complicated Unicode thing; otherwise assume that
83 characters take up one cell each.
84
85 None of this handles tab characters in any kind of useful way. Sorry.
86 """
87
88 ## If there's no encoding for stdout then we're doing something stupid.
89 if SYS.stdout.encoding is None: return len(s)
90
91 ## Turn the string into Unicode so we can hack on it properly. Maybe that
92 ## won't work out, in which case fall back to being stupid.
93 try: u = s.decode(SYS.stdout.encoding)
94 except UnicodeError: return len(s)
95
96 ## Our main problem is combining characters, but we should also try to
97 ## handle wide (mostly Asian) characters, and zero-width ones. This hack
98 ## is taken mostly from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
99 w = 0
100 for ch in u:
101 cd = ord(ch)
102 if UD.category(ch) in ['Cf', 'Me', 'Mn'] or \
103 0x1160 <= cd <= 0x11ff: pass
104 elif UD.east_asian_width(ch) in ['F', 'W']: w += 2
105 else: w += 1
106
107 ## Done.
583b7e4a
MW
108 return w
109
110class StatusLine (object):
111 """
112 Maintains a status line containing ephemeral progress information.
113
114 The status line isn't especially important, but it keeps interactive users
115 amused.
116
117 There should be only one status line object in your program; otherwise
118 they'll interfere with each other and get confused.
119
120 The update algorithm (in `set') is fairly careful to do the right thing
121 with long status `lines', and to work properly in an Emacs `shell' buffer.
122 """
123
124 def __init__(me):
125 "Initialize the status line."
126 me._last = ''
127 me._lastlen = 0
128 me.eyecandyp = OS.isatty(SYS.stdout.fileno())
129
130 def set(me, line):
131 """
132 Set the status line contents to LINE, replacing what was there before.
133
134 This only produces actual output if stdout is interactive.
135 """
136 n = len(line)
137
138 ## Eyecandy update.
139 if me.eyecandyp:
583b7e4a
MW
140
141 ## If the old line was longer, we need to clobber its tail, so work out
142 ## what that involves.
143 if n < me._lastlen:
144 b = charwidth(me._last[n:])
145 pre = '\b'*b + ' '*b
146 else:
147 pre = ''
148
149 ## Now figure out the length of the common prefix between what we had
150 ## before and what we have now. This reduces the amount of I/O done,
151 ## which keeps network traffic down on SSH links, and keeps down the
152 ## amount of work slow terminal emulators like Emacs have to do.
153 i = 0
154 m = min(n, me._lastlen)
155 while i < m and line[i] == me._last[i]:
156 i += 1
157
158 ## Actually do the output, all in one syscall.
159 b = charwidth(me._last[i:])
160 SYS.stdout.write(pre + '\b'*b + line[i:])
583b7e4a
MW
161 SYS.stdout.flush()
162
163 ## Update our idea of what's gone on.
164 me._lastlen = n
165 me._last = line
166
167 def clear(me):
168 "Clear the status line. Just like set('')."
169 me.set('')
170
171 def commit(me, line = None):
172 """
173 Commit the current status line, and maybe the string LINE.
174
175 If the current status line is nonempty, then commit it to the transcript.
176 If LINE is not None, then commit that to the transcript too.
177
178 After all of this, we clear the status line to get back to a clean state.
179 """
180 if me._last:
181 if me.eyecandyp:
182 SYS.stdout.write('\n')
183 else:
184 SYS.stdout.write(me._last + '\n')
185 if line is not None:
186 SYS.stdout.write(line + '\n')
187 me._lastlen = 0
188 me._last = ''
189
190STATUS = StatusLine()
191
192def filestatus(file, status):
193 return '%s%s: %s' % (' '*8, OS.path.basename(file), status)
194
195class ProgressEyecandy (object):
196 """
197 Provide amusement while something big and complicated is happening.
198
199 This is an abstract class. Subclasses must provide a method `progress'
200 returning a pair (CURRENT, MAX) indicating the current progress through the
201 operation.
202 """
203
204 def __init__(me, what, silentp = False):
205 """
206 Initialize a progress meter.
207
208 WHAT is a prefix string to be written before the progress eyecandy
209 itself.
210 """
211 me._what = what
212 me._silentp = silentp
213 me._spinner = 0
214 me._start = T.time()
215
216 def _fmt_time(me, t):
217 "Format T as a time, in (maybe hours) minutes and seconds."
218 s, t = t % 60, int(t/60)
219 m, h = t % 60, int(t/60)
220 if h > 0:
221 return '%d:%02d:%02d' % (h, m, s)
222 else:
223 return '%02d:%02d' % (m, s)
224
225 def show(me):
226 "Show the current level of progress."
227
228 ## If we're not showing pointless frippery, don't bother at all.
229 if not STATUS.eyecandyp:
230 return
231
232 ## Update the spinner index.
233 me._spinner = (me._spinner + 1)%4
234
235 ## Fetch the current progress information. Note that we always fetch
236 ## both the current and maximum levels, because both might change if an
237 ## operation revises its idea of how much work needs doing.
238 cur, max = me.progress()
239
240 ## If we couldn't get progress information, display something vaguely
241 ## amusing anyway.
242 if cur is None or max is None:
243 STATUS.set('%s %c [unknown progress]' %
244 (me._what, r'/-\|'[me._spinner]))
245 return
246
247 ## Work out -- well, guess -- the time remaining.
248 if cur:
249 t = T.time()
00beb9e5 250 eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
583b7e4a
MW
251 else:
252 eta = '???'
253
254 ## Set the status bar.
255 n = 40*cur/max
256 STATUS.set('%s %c [%s%s] %3d%% (%s)' % \
257 (me._what,
258 r'/-\|'[me._spinner],
259 '='*n, ' '*(40 - n),
260 100*cur/max,
261 eta))
262
263 def done(me, win = True):
264 "Show a completion notice, or a failure if WIN is false."
265 if not win:
266 STATUS.set('%s FAILED!' % me._what)
267 elif not me._silentp:
268 STATUS.set('%s done (%s)' %
269 (me._what,
270 me._fmt_time(T.time() - me._start)))
271 else:
272 return
273 STATUS.commit()
274
275###--------------------------------------------------------------------------
276### Timeout handling.
277
278KILLSWITCH = TH.Event()
279
280def timeout(t0, t1):
281 T.sleep(t0)
282 KILLSWITCH.set()
283 T.sleep(t1)
284 moan('dying messily due to timeout')
285 OS._exit(3)
286
287###--------------------------------------------------------------------------
288### Parsing utilities.
289
290## Allow hyphens in identifiers.
291IDCHARS = P.alphanums + '-_'
292P.Keyword.setDefaultKeywordChars(IDCHARS)
293
294## Some common kinds of tokens.
295Name = P.Word(IDCHARS)
296Num = P.Word(P.nums).setParseAction(lambda toks: map(int, toks))
297String = P.QuotedString('"', '\\')
298
299## Handy abbreviations for constructed parser elements.
300def K(k): return P.Keyword(k).suppress()
301def D(d): return P.Literal(d).suppress()
583b7e4a
MW
302def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
303O = P.Optional
304
305###--------------------------------------------------------------------------
306### Format identification and conversion.
307
308class IdentificationFailure (Exception):
309 pass
310
311class FileCategory (object):
312 """
313 A FileCategory represents a class of files.
314
315 For example, it's sensible to consider audio, or image files as a
316 category. A file category knows how to recognize member files from
317 MIME content types.
318 """
319
320 def __init__(me, name, mime_pats, ident):
321 """
322 Construct a new category.
323
324 The PATS are a list of `fnmatch' patterns to be compared with a MIME
325 type. The IDENT is a function which produces an identification object
326 given a file's name and first-guess MIME type. The object is passed to a
327 Format's `check' method to see whether a file needs re-encoding, and to
328 `convert' to assist with the conversion.
329
330 An identification object must have an attribute `mime' which is a set of
331 possible MIME types accumulated for the object.
332 """
333 me.name = name
334 me._mime_pats = mime_pats
335 me._ident = ident
336 CATEGORYMAP[name] = me
337
338 def identify(me, file, mime):
339 """
340 Attempt to identify FILE, given its apparent MIME type.
341
342 If identification succeeds, return an identification object which can be
343 used by associated file formats; otherwise return None.
344 """
345 for p in me._mime_pats:
346 if not FN.fnmatchcase(mime, p):
347 continue
348 try:
349 return me._ident(file, mime)
350 except IdentificationFailure:
351 pass
352 return None
353
354class BaseFormat (object):
355 """
356 A BaseFormat object represents a particular encoding and parameters.
357
358 The object can verify (the `check' method) whether a particular file
359 matches its requirements, and if necessary (`encode') re-encode a file.
360
361 Subclasses should define the following methods.
362
363 check(ID)
364 Answer whether the file identified by ID is acceptable according to
365 the receiver's parameters.
366
367 convert(MASTER, ID, TARGET)
368 Convert the file MASTER, which has been identified as ID, according
369 to the receiver's parameters, writing the output to TARGET.
370
371 Subclasses should also provide these attributes.
372
373 CATEGORY
374 A FileCategory object for the category of files that this format
375 lives within.
376
377 EXT A file extension to be applied to encoded output files.
378
379 NAME A user-facing name for the format.
380
381 PROPS A parser element to parse a property definition. It should produce
382 a pair NAME, VALUE to be stored in a dictionary.
383
384 Subclasses for different kinds of file may introduce more subclass
385 protocol.
386 """
387
388 def fixup(me, path):
389 """Post-encoding fixups."""
390 pass
391
392FORMATMAP = {}
393CATEGORYMAP = {}
394
395def defformat(name, cls):
396 "Define a format NAME using class CLS."
397 if not hasattr(cls, 'NAME'):
398 raise ValueError, 'abstract class'
399 if not hasattr(cls, 'CATEGORY'):
400 raise ValueError, 'no category'
401 FORMATMAP[name] = cls
402
403class FormatParser (P.ParserElement):
404 """
405 Parse a format specifier:
406
407 format-spec ::= string [format-properties]
408 format-properties ::= `{' format-property (`,' format-property)* `}'
409
410 The syntax of a format-property is determined by the PROPS attribute on the
411 named format and its superclasses.
412 """
413
483b52d0
MW
414 name = 'format-spec'
415
583b7e4a
MW
416 ## We cache the parser elements we generate to avoid enormous consing.
417 CACHE = {}
418
419 def parseImpl(me, s, loc, actp = True):
420
421 ## Firstly, determine the format name.
422 loc, r = Name._parse(s, loc, actp)
423 fmt = r[0]
424
425 ## Look up the format class.
426 try: fcls = FORMATMAP[fmt]
427 except KeyError:
428 raise P.ParseException(s, loc, "Unknown format `%s'" % fmt)
429
430 ## Fetch the property-list parser from the cache, if possible; else
431 ## construct it.
432 try:
433 pp = me.CACHE[fmt]
434 except KeyError:
435 seen = set()
436 prop = None
437 for c in fcls.mro():
438 try: p = c.PROPS
439 except AttributeError: continue
440 if p in seen: continue
441 if prop is None: prop = p
442 else: prop |= p
443 seen.add(p)
444 if prop is None:
445 pp = me.CACHE[fmt] = None
446 else:
447 props = P.delimitedList(prop)
448 props.setParseAction(lambda s, l, t: dict(t.asList()))
449 pp = me.CACHE[fmt] = O(D('{') - props - D('}'))
450
451 ## Parse the properties.
452 if pp is None:
453 pd = {}
454 else:
455 loc, r = pp._parse(s, loc, actp)
456 if r: pd = r[0]
457 else: pd = {}
458
459 ## Construct the format object and return it.
460 return loc, fcls(**pd)
461
462Format = FormatParser()
463
464def prop(kw, pval, tag = None):
465 if tag is None: tag = kw
466 if pval is None:
467 p = K(kw)
468 p.setParseAction(lambda s, l, t: (tag, True))
469 else:
470 p = K(kw) + D('=') + pval
471 p.setParseAction(lambda s, l, t: (tag, t[0]))
472 return p
473
474###--------------------------------------------------------------------------
475### Policies and actions.
476
477class Action (object):
478 """
479 An Action object represents a conversion action to be performed.
480
481 This class isn't intended to be instantiated directly. It exists to define
482 some protocol common to all Action objects.
483
484 Action objects have the following attributes.
485
486 master The name of the master (source) file.
487
488 target The name of the target (destination) file.
489
490 PRIORITY The priority of the action, for deciding which of two actions
491 to perform. Higher priorities are more likely to win.
492
493 Converting an Action to a string describes the action in a simple
494 user-readable manner. The `perform' method actually carries the action
495 out.
496 """
497
498 PRIORITY = 0
499
500 def __init__(me, master):
501 "Stash the MASTER file name for later."
502 me.master = master
503
504 def choose(me, him):
505 "Choose either ME or HIM and return one."
506 if him is None or me.PRIORITY > him.PRIORITY:
507 return me
508 else:
509 return him
510
511class CopyAction (Action):
512 """
513 An Action object for simply copying a file.
514
515 Actually we try to hardlink it first, falling back to a copy later. This
516 is both faster and more efficient with regard to disk space.
517 """
518
519 ## Copying is good. Linking is really good, but we can't tell the
520 ## difference at this stage.
521 PRIORITY = 10
522
523 def __init__(me, master, targetdir):
524 "Initialize a CopyAction, from MASTER to the TARGETDIR directory."
525 Action.__init__(me, master)
526 me.target = OS.path.join(targetdir, OS.path.basename(master))
527
528 def __str__(me):
529 return 'copy/link'
530
531 def perform(me):
532 "Actually perform a CopyAction."
533 try:
534 STATUS.set(filestatus(me.master, 'link'))
535 OS.link(me.master, me.target)
536 except OSError, err:
537 if err.errno != E.EXDEV:
538 raise
539 STATUS.set(filestatus(me.master, 'copy'))
540 new = me.target + '.new'
541 SH.copyfile(me.master, new)
542 OS.rename(new, me.target)
543 STATUS.commit()
544
545class ConvertAction (Action):
546 """
547 An Action object for converting a file to a given format.
548
549 Additional attributes:
550
551 id The identification object for the master file.
552
553 format The format to which we're meant to conver the master.
554 """
555
556 def __init__(me, master, targetdir, id, format):
557 "Initialize a ConvertAction."
558 Action.__init__(me, master)
559 stem, ext = OS.path.splitext(OS.path.basename(master))
560 me.target = OS.path.join(targetdir, stem + '.' + format.EXT)
561 me.id = id
562 me.format = format
563
564 def __str__(me):
565 return 'convert to %s' % me.format.NAME
566
567 def perform(me):
568 "Acually perform a ConvertAction."
569 STATUS.set(filestatus(me.master, me))
570 me.format.convert(me.master, me.id, me.target)
571
572Policy = P.Forward()
573
574class FormatPolicy (object):
575 """
576 A FormatPolicy object represents a set of rules for how to convert files.
577
578 Given a master file, the FormatPolicy will identify it and return a list of
579 actions to be performed. The methods required of a FormatPolicy are:
580
581 setcategory(CAT)
582 Store CAT as the policy's category. Check that this is consistent
583 with the policy as stored.
584
585 actions(MASTER, TARGETDIR, ID, COHORT)
586 Given a MASTER file, identified as ID, a target directory
587 TARGETDIR, and a list COHORT of (FILE, ID) pairs for other files
588 of the same category in the same directory, return a list of
589 actions to be performed to get the target directory into the right
590 form. The list might be empty if the policy object /rejects/ the
591 file.
592 """
593
594class AndPolicy (FormatPolicy):
595 """
596 A FormatPolicy which does the union of a bunch of other policies.
597
598 Each subsidiary policy is invoked in turn. The highest-priority action for
599 each target file is returned.
600 """
601
602 def __init__(me, policies):
603 me._policies = policies
604
605 def setcategory(me, cat):
606 me.cat = cat
607 for p in me._policies:
608 p.setcategory(cat)
609
610 def actions(me, master, targetdir, id, cohort):
611 tmap = {}
612 for p in me._policies:
613 for a in p.actions(master, targetdir, id, cohort):
614 if a.target in tmap:
615 tmap[a.target] = a.choose(tmap.get(a.target))
616 else:
617 tmap[a.target] = a
618 return tmap.values()
619
620And = K('and') - D('{') - R(Policy) - D('}')
621And.setParseAction(lambda s, l, t: AndPolicy(t[0]))
622
623class OrPolicy (FormatPolicy):
624 """
625 A FormatPolicy which tries other policies and uses the first that accepts.
626
627 Each subsidiary policy is invoked in turn. If any accepts, the actions it
628 proposes are turned and no further policies are invoked. If none accepts
629 then the file is rejected.
630 """
631
632 def __init__(me, policies):
633 me._policies = policies
634
635 def setcategory(me, cat):
636 me.cat = cat
637 for p in me._policies:
638 p.setcategory(cat)
639
640 def actions(me, master, targetdir, id, cohort):
641 for p in me._policies:
642 aa = p.actions(master, targetdir, id, cohort)
643 if aa:
644 return aa
645 else:
646 return []
647
648Or = K('or') - D('{') - R(Policy) - D('}')
649Or.setParseAction(lambda s, l, t: OrPolicy(t[0]))
650
651class AcceptPolicy (FormatPolicy):
652 """
653 A FormatPolicy which copies files in a particular format.
654
655 If all of the files in a cohort are recognized as being in a particular
656 format (including this one), then accept it with a CopyAction; otherwise
657 reject.
658 """
659
660 def __init__(me, format):
661 me._format = format
662
663 def setcategory(me, cat):
664 if me._format.CATEGORY is not cat:
665 raise ValueError, \
666 "Accept format `%s' has category `%s', not `%s'" % \
667 (me._format.__class__.__name__,
668 me._format.CATEGORY.name, cat.name)
669 me.cat = cat
670
671 def actions(me, master, targetdir, id, cohort):
672 if me._format.check(id) and \
673 all(me._format.check(cid) for f, cid in cohort):
674 return [CopyAction(master, targetdir)]
675 else:
676 return []
677
678Accept = K('accept') - Format
679Accept.setParseAction(lambda s, l, t: AcceptPolicy(t[0]))
680
681class ConvertPolicy (FormatPolicy):
682 """
683 A FormatPolicy which copies files in a particular format or converts if
684 necessary.
685 """
686 def __init__(me, format):
687 me._format = format
688
689 def setcategory(me, cat):
690 if me._format.CATEGORY is not cat:
691 raise ValueError, \
692 "Accept format `%s' has category `%s', not `%s'" % \
693 (me._format.__class__.__name__,
694 me._format.CATEGORY.name, cat.name)
695 me.cat = cat
696
697 def actions(me, master, targetdir, id, cohort):
698 if me._format.check(id):
699 return [CopyAction(master, targetdir)]
700 else:
701 return [ConvertAction(master, targetdir, id, me._format)]
702
703Convert = K('convert') - Format
704Convert.setParseAction(lambda s, l, t: ConvertPolicy(t[0]))
705
706Policy << (And | Or | Accept | Convert)
707
708###--------------------------------------------------------------------------
709### Audio handling, based on GStreamer.
710
711def make_element(factory, name = None, **props):
712 "Return a new element from the FACTORY with the given NAME and PROPS."
713 elt = GS.element_factory_make(factory, name)
714 elt.set_properties(**props)
715 return elt
716
0dbdd41c
MW
717def link_elements(elts):
718 "Link the elements ELTS together, in order."
719 GS.element_link_many(*elts)
720
2101727d
MW
721def bin_children(bin):
722 "Iterate over the (direct) children of a BIN."
723 for elt in bin.elements(): yield elt
724
583b7e4a
MW
725class GStreamerProgressEyecandy (ProgressEyecandy):
726 """
727 Provide amusement while GStreamer is busy doing something.
728
729 The GStreamerProgressEyecandy object is a context manager. Wrap it round
730 your GStreamer loop to provide progress information for an operation.
731 """
732
733 def __init__(me, what, elt, **kw):
734 """
735 Initialize a progress meter.
736
737 WHAT is a prefix string to be written before the progress eyecandy
738 itself. ELT is a GStreamer element to interrogate to find the progress
739 information.
740 """
741 me._elt = elt
742 ProgressEyecandy.__init__(me, what, **kw)
743
744 def _update(me):
745 "Called by GLib main event loop to update the eyecandy."
746 me.show()
747 return True
748
749 def _timer(me):
750 """
751 Update the progress meter.
752
753 This is called periodically by the GLib main event-processing loop.
754 """
755 me.show()
756 return True
757
758 def progress(me):
759 "Return the current progress as a pair (CURRENT, MAX)."
760
761 ## Fetch the current progress information. We get the duration each
762 ## time, because (particularly with VBR-encoded MP3 inputs) the estimated
763 ## duration can change as we progress. Hopefully it settles down fairly
764 ## soon.
765 try:
766 t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
767 end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
768 return t, end
769 except GS.QueryError:
770 return None, None
771
772 def __enter__(me):
773 "Enter context: attach progress meter display."
774
775 ## If we're not showing pointless frippery, don't bother at all.
776 if not STATUS.eyecandyp:
777 return
778
779 ## Update regularly. The pipeline runs asynchronously.
780 me._id = G.timeout_add(200, me._update)
781
782 def __exit__(me, ty, val, tb):
783 "Leave context: remove display and report completion or failure."
784
785 ## If we're not showing pointless frippery, there's nothing to remove.
786 if STATUS.eyecandyp:
787 G.source_remove(me._id)
788
789 ## Report completion anyway.
790 me.done(ty is None)
791
792 ## As you were.
793 return False
794
795class AudioIdentifier (object):
796 """
797 Analyses and identifies an audio file.
798
799 Important properties are:
800
801 cap A capabilities structure describing the audio file data. The most
802 interesting thing in here is probably its name, which is a MIME
803 type describing the data.
804
805 dcap A capabilities structure describing the decoded audio data. This
806 is of interest during conversion.
807
808 tags A dictionary containing metadata tags from the file. These are in
809 GStreamer's encoding-independent format.
810
811 bitrate An approximation to the stream's bitrate, in kilobits per second.
812 This might be slow to work out for some files so it's computed on
813 demand.
814 """
815
816 def __init__(me, file, mime):
817 "Initialize the object suitably for identifying FILE."
818
819 ## Make some initial GStreamer objects. We'll want the pipeline later if
820 ## we need to analyse a poorly tagged MP3 stream, so save it away.
821 me._pipe = GS.Pipeline()
822 me._file = file
823 bus = me._pipe.get_bus()
824 bus.add_signal_watch()
825 loop = G.MainLoop()
826
827 ## The basic recognition kit is based around `decodebin'. We must keep
828 ## it happy by giving it sinks for the streams it's found, which it
829 ## announces asynchronously.
830 source = make_element('filesrc', 'file', location = file)
831 decoder = make_element('decodebin', 'decode')
832 sink = make_element('fakesink')
833 def decoder_pad_arrived(elt, pad):
834 if pad.get_caps()[0].get_name().startswith('audio/'):
835 elt.link_pads(pad.get_name(), sink, 'sink')
836 dpaid = decoder.connect('pad-added', decoder_pad_arrived)
837 me._pipe.add(source, decoder, sink)
0dbdd41c 838 link_elements([source, decoder])
583b7e4a
MW
839
840 ## Arrange to collect tags from the pipeline's bus as they're reported.
841 ## If we reuse the pipeline later, we'll want different bus-message
842 ## handling, so make sure we can take the signal handler away.
843 tags = {}
844 fail = []
845 def bus_message(bus, msg):
846 if msg.type == GS.MESSAGE_ERROR:
847 fail[:] = (ValueError, msg.structure['debug'], None)
848 loop.quit()
849 elif msg.type == GS.MESSAGE_STATE_CHANGED:
850 if msg.structure['new-state'] == GS.STATE_PAUSED and \
851 msg.src == me._pipe:
852 loop.quit()
853 elif msg.type == GS.MESSAGE_TAG:
854 tags.update(msg.structure)
855 bmid = bus.connect('message', bus_message)
856
857 ## We want to identify the kind of stream this is. (Hmm. The MIME type
858 ## recognizer has already done this work, but GStreamer is probably more
859 ## reliable.) The `decodebin' has a `typefind' element inside which will
860 ## announce the identified media type. All we need to do is find it and
861 ## attach a signal handler. (Note that the handler might be run in the
862 ## thread context of the pipeline element, but Python's GIL will keep
863 ## things from being too awful.)
864 me.cap = None
865 me.dcap = None
2101727d 866 for e in bin_children(decoder):
583b7e4a
MW
867 if e.get_factory().get_name() == 'typefind':
868 tfelt = e
869 break
870 else:
871 assert False, 'failed to find typefind element'
872
873 ## Crank up most of the heavy machinery. The message handler will stop
874 ## the loop when things seem to be sufficiently well underway.
875 me._pipe.set_state(GS.STATE_PAUSED)
876 loop.run()
877 bus.disconnect(bmid)
878 decoder.disconnect(dpaid)
879 if fail:
880 me._pipe.set_state(GS.STATE_NULL)
881 raise fail[0], fail[1], fail[2]
882
883 ## Store the collected tags.
884 me.tags = tags
885
886 ## Gather the capabilities. The `typefind' element knows the input data
887 ## type. The 'decodebin' knows the raw data type.
888 me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
889 me.mime = set([mime, me.cap.get_name()])
890 me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
891
892 ## If we found a plausible bitrate then stash it. Otherwise note that we
893 ## failed. If anybody asks then we'll work it out then.
894 if 'nominal-bitrate' in tags:
895 me._bitrate = tags['nominal-bitrate']/1000
896 elif 'bitrate' in tags and tags['bitrate'] >= 80000:
897 me._bitrate = tags['bitrate']/1000
898 else:
899 me._bitrate = None
900
901 ## The bitrate computation wants the file size. Ideally we'd want the
902 ## total size of the frames' contents, but that seems hard to dredge
903 ## out. If the framing overhead is small, this should be close enough
904 ## for our purposes.
905 me._bytes = OS.stat(file).st_size
906
907 def __del__(me):
908 "Close the pipeline down so we don't leak file descriptors."
909 me._pipe.set_state(GS.STATE_NULL)
910
911 @property
912 def bitrate(me):
913 """
914 Return the approximate bit-rate of the input file.
915
916 This might take a while if we have to work it out the hard way.
917 """
918
919 ## If we already know the answer then just return it.
920 if me._bitrate is not None:
921 return me._bitrate
922
923 ## Make up a new main loop.
924 loop = G.MainLoop()
925
926 ## Watch for bus messages. We'll stop when we reach the end of the
927 ## stream: then we'll have a clear idea of how long the track was.
928 fail = []
929 def bus_message(bus, msg):
930 if msg.type == GS.MESSAGE_ERROR:
931 fail[:] = (ValueError, msg.structure['debug'], None)
932 loop.quit()
933 elif msg.type == GS.MESSAGE_EOS:
934 loop.quit()
935 bus = me._pipe.get_bus()
936 bmid = bus.connect('message', bus_message)
937
938 ## Get everything moving, and keep the user amused while we work.
939 me._pipe.set_state(GS.STATE_PLAYING)
940 with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
941 me._pipe,
942 silentp = True):
943 loop.run()
944 bus.disconnect(bmid)
945 if fail:
946 me._pipe.set_state(GS.STATE_NULL)
947 raise fail[0], fail[1], fail[2]
948
949 ## Now we should be able to find out our position accurately and work out
950 ## a bitrate. Cache it in case anybody asks again.
951 t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
952 me._bitrate = int(8*me._bytes*1e6/t)
953
954 ## Done.
955 return me._bitrate
956
957class AudioFormat (BaseFormat):
958 """
959 An AudioFormat is a kind of Format specialized for audio files.
960
961 Format checks are done on an AudioIdentifier object.
962 """
963
964 PROPS = prop('bitrate', Num)
965
966 ## libmagic reports `application/ogg' for Ogg Vorbis files. We've switched
967 ## to GIO now, which reports either `audio/ogg' or `audio/x-vorbis+ogg'
968 ## depending on how thorough it's trying to be. Still, it doesn't do any
969 ## harm here; the main risk is picking up Ogg Theora files by accident, and
970 ## we'll probably be able to extract the audio from them anyway.
971 CATEGORY = FileCategory('audio', ['audio/*', 'application/ogg'],
972 AudioIdentifier)
973
974 def __init__(me, bitrate = None):
975 "Construct an object, requiring an approximate bitrate."
976 me.bitrate = bitrate
977
978 def check(me, id):
979 """
980 Return whether the AudioIdentifier ID is suitable for our purposes.
981
982 Subclasses can either override this method or provide a property
983 `MIMETYPES', which is a list (other thing that implements `__contains__')
984 of GStreamer MIME types matching this format.
985 """
986 return id.mime & me.MIMETYPES and \
987 (me.bitrate is None or id.bitrate <= me.bitrate * sqrt(2))
988
989 def encoder(me):
990 """
991 Constructs a GStreamer element to encode audio input.
992
993 Subclasses can either override this method (or replace `encode'
994 entirely), or provide a method `encoder_chain' which returns a list of
995 elements to be linked together in sequence. The first element in the
996 chain must have a pad named `sink' and the last must have a pad named
997 `src'.
998 """
999 elts = me.encoder_chain()
1000 bin = GS.Bin()
1001 bin.add(*elts)
0dbdd41c 1002 link_elements(elts)
583b7e4a
MW
1003 bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
1004 bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
1005 return bin
1006
1007 def convert(me, master, id, target):
1008 """
1009 Encode audio from MASTER, already identified as ID, writing it to TARGET.
1010
1011 See `encoder' for subclasses' responsibilities.
1012 """
1013
1014 ## Construct the necessary equipment.
1015 pipe = GS.Pipeline()
1016 bus = pipe.get_bus()
1017 bus.add_signal_watch()
1018 loop = G.MainLoop()
1019
1020 ## Make sure that there isn't anything in the way of our output. We're
1021 ## going to write to a scratch file so that we don't get confused by
1022 ## half-written rubbish left by a crashed program.
1023 new = target + '.new'
1024 try:
1025 OS.unlink(new)
1026 except OSError, err:
1027 if err.errno != E.ENOENT:
1028 raise
1029
1030 ## Piece together our pipeline. The annoying part is that the
1031 ## `decodebin' doesn't have any source pads yet, so our chain is in two
1032 ## halves for now.
1033 source = make_element('filesrc', 'source', location = master)
1034 decoder = make_element('decodebin', 'decode')
1035 convert = make_element('audioconvert', 'convert')
1036 encoder = me.encoder()
1037 sink = make_element('filesink', 'sink', location = new)
1038 pipe.add(source, decoder, convert, encoder, sink)
0dbdd41c
MW
1039 link_elements([source, decoder])
1040 link_elements([convert, encoder, sink])
583b7e4a
MW
1041
1042 ## Some decoders (e.g., the AC3 decoder) include channel-position
1043 ## indicators in their output caps. The Vorbis encoder interferes with
1044 ## this, and you end up with a beautifully encoded mono signal from a
1045 ## stereo source. From a quick butchers at the `vorbisenc' source, I
1046 ## /think/ that this is only a problem with stereo signals: mono signals
1047 ## are mono already, and `vorbisenc' accepts channel positions if there
1048 ## are more than two channels.
1049 ##
1050 ## So we have this bodge. We already collected the decoded audio caps
1051 ## during identification. So if we see 2-channel audio with channel
1052 ## positions, we strip the positions off forcibly by adding a filter.
1053 if id.dcap.get_name().startswith('audio/x-raw-') and \
1054 id.dcap.has_field('channels') and \
1055 id.dcap['channels'] == 2 and \
1056 id.dcap.has_field('channel-positions'):
1057 dcap = GS.Caps()
1058 c = id.dcap.copy()
1059 c.remove_field('channel-positions')
1060 dcap.append(c)
1061 else:
1062 dcap = None
1063
1064 ## Hook onto the `decodebin' so we can link together the two halves of
1065 ## our encoding chain. For now, we'll hope that there's only one audio
1066 ## stream in there, and just throw everything else away.
1067 def decoder_pad_arrived(elt, pad):
1068 if pad.get_caps()[0].get_name().startswith('audio/'):
1069 if dcap:
1070 elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
1071 else:
1072 elt.link_pads(pad.get_name(), convert, 'sink')
1073 decoder.connect('pad-added', decoder_pad_arrived)
1074
1075 ## Watch the bus for completion messages.
1076 fail = []
1077 def bus_message(bus, msg):
1078 if msg.type == GS.MESSAGE_ERROR:
1079 fail[:] = (ValueError, msg.structure['debug'], None)
1080 loop.quit()
1081 elif msg.type == GS.MESSAGE_EOS:
1082 loop.quit()
1083 bmid = bus.connect('message', bus_message)
1084
1085 ## Get everything ready and let it go.
1086 pipe.set_state(GS.STATE_PLAYING)
1087 with GStreamerProgressEyecandy(filestatus(master,
1088 'convert to %s' % me.NAME),
1089 pipe):
1090 loop.run()
1091 pipe.set_state(GS.STATE_NULL)
1092 if fail:
1093 raise fail[0], fail[1], fail[2]
1094
1095 ## Fix up the output file if we have to.
1096 me.fixup(new)
1097
1098 ## We're done.
1099 OS.rename(new, target)
1100
1101class OggVorbisFormat (AudioFormat):
1102 "AudioFormat object for Ogg Vorbis."
1103
00eb0a5b 1104 ## From https://en.wikipedia.org/wiki/Vorbis
583b7e4a
MW
1105 QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
1106 ( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
1107 ( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
1108
1109 NAME = 'Ogg Vorbis'
1110 MIMETYPES = set(['application/ogg', 'audio/x-vorbis', 'audio/ogg',
1111 'audio/x-vorbis+ogg'])
1112 EXT = 'ogg'
1113
1114 def encoder_chain(me):
3589c4a4
MW
1115 encprops = {}
1116 if me.bitrate is not None:
1117 for q, br in me.QMAP:
1118 if br >= me.bitrate:
1119 break
cf3c562e
MW
1120 else:
1121 raise ValueError, 'no suitable quality setting found'
3589c4a4
MW
1122 encprops['quality'] = q/10.0
1123 return [make_element('vorbisenc', **encprops),
583b7e4a
MW
1124 make_element('oggmux')]
1125
1126defformat('ogg-vorbis', OggVorbisFormat)
1127
1128class MP3Format (AudioFormat):
1129 "AudioFormat object for MP3."
1130
1131 NAME = 'MP3'
1132 MIMETYPES = set(['audio/mpeg'])
1133 EXT = 'mp3'
1134
1135 def encoder_chain(me):
3589c4a4
MW
1136 encprops = {}
1137 if me.bitrate is not None: encprops['vbr_mean_bitrate'] = me.bitrate
1138 return [make_element('lame', vbr = 4, **encprops),
583b7e4a
MW
1139 make_element('xingmux'),
1140 make_element('id3v2mux')]
1141
1142 def fixup(me, path):
1143 """
1144 Fix up MP3 files.
1145
1146 GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
1147 unkind to stupid players.
1148 """
608b936e
MW
1149 f = E3.load(path)
1150 if f is None: return
1151 t = f.tag
1152 if t is None: return
1153 for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
1154 try: f.tag.save(version = v)
1155 except (UnicodeEncodeError,
1156 E3.id3.GenreException,
1157 E3.id3.TagException):
1158 pass
583b7e4a
MW
1159
1160defformat('mp3', MP3Format)
1161
1162###--------------------------------------------------------------------------
1163### Image handling, based on the Python Imaging Library.
1164
1165class ImageIdentifier (object):
1166 """
1167 Analyses and identifies an image file.
1168
1169 Simply leaves an Image object in the `img' property which can be inspected.
1170 """
1171
1172 def __init__(me, file, mime):
1173
1174 ## Get PIL to open the file. It will magically work out what kind of
1175 ## file it is.
1176 try:
1177 me.img = I.open(file)
1178 except IOError, exc:
1179
1180 ## Unhelpful thing to raise on identification failure. We can
1181 ## distinguish this from an actual I/O error because it doesn't have an
1182 ## `errno'.
1183 if exc.errno is None:
1184 raise IdentificationFailure
1185 raise
1186
1187 me.mime = set([mime])
1188
1189class ImageFormat (BaseFormat):
1190 """
1191 An ImageFormat is a kind of Format specialized for image files.
1192
1193 Subclasses don't need to provide anything other than the properties
1194 required by all concrete Format subclasses. However, there is a
1195 requirement that the `NAME' property match PIL's `format' name for the
1196 format.
1197 """
1198
1199 PROPS = prop('size', Num)
1200 CATEGORY = FileCategory('image', ['image/*'], ImageIdentifier)
1201
1202 def __init__(me, size = None, **kw):
1203 """
1204 Initialize an ImageFormat object.
1205
1206 Additional keywords are used when encoding, and may be recognized by
1207 enhanced `check' methods in subclasses.
1208 """
1209 me._size = size
1210 me._props = kw
1211
1212 def check(me, id):
1213 "Check whether the ImageIdentifier ID matches our requirements."
1214 return id.img.format == me.NAME and \
1215 (me._size is None or
1216 (id.img.size[0] <= me._size and
1217 id.img.size[1] <= me._size))
1218
1219 def convert(me, master, id, target):
1220 "Encode the file MASTER, identified as ID, writing the result to TARGET."
1221
1222 ## Write to a scratch file.
1223 new = target + '.new'
1224
1225 ## The ImageIdentifier already contains a copy of the open file. It
1226 ## would be wasteful not to use it.
1227 img = id.img
1228 STATUS.set(filestatus(master, 'convert to %s' % me.NAME))
1229
1230 ## If there's a stated maximum size then scale the image down to match.
1231 ## But thumbnailing clobbers the original, so take a copy.
1232 if me._size is not None and \
1233 (img.size[0] > me._size or img.size[1] > me._size):
1234 img = img.copy()
1235 img.thumbnail((me._size, me._size), I.ANTIALIAS)
1236
1237 ## Write the output image.
1238 img.save(new, me.NAME, **me._props)
1239
1240 ## Fix it up if necessary.
1241 me.fixup(new)
1242
1243 ## We're done.
1244 OS.rename(new, target)
1245 STATUS.commit()
1246
1247class JPEGFormat (ImageFormat):
1248 """
1249 Image format for JPEG (actually JFIF) files.
1250
1251 Interesting properties to set:
1252
1253 optimize
1254 If present, take a second pass to select optimal encoder settings.
1255
b524aa9d 1256 progressive
583b7e4a
MW
1257 If present, make a progressive file.
1258
1259 quality Integer from 1--100 (worst to best); default is 75.
1260 """
1261 EXT = 'jpg'
1262 NAME = 'JPEG'
1263 PROPS = prop('optimize', None) \
1264 | prop('progressive', None, 'progression') \
1265 | prop('quality', Num)
1266
1267defformat('jpeg', JPEGFormat)
1268
1269class PNGFormat (ImageFormat):
1270 """
1271 Image format for PNG files.
1272
1273 Interesting properties:
1274
1275 optimize
1276 If present, make a special effort to minimize the output file.
1277 """
1278 EXT = 'png'
1279 NAME = 'PNG'
1280 PROPS = prop('optimize', None)
1281
1282defformat('png', PNGFormat)
1283
1284class BMPFormat (ImageFormat):
1285 """
1286 Image format for Windows BMP files, as used by RockBox.
1287
1288 No additional properties.
1289 """
1290 NAME = 'BMP'
1291 EXT = 'bmp'
1292
1293defformat('bmp', BMPFormat)
1294
1295###--------------------------------------------------------------------------
e0361afb
MW
1296### Remaining parsing machinery.
1297
1298Type = K('type') - Name - D('{') - R(Policy) - D('}')
1299def build_type(s, l, t):
1300 try:
1301 cat = CATEGORYMAP[t[0]]
1302 except KeyError:
1303 raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
1304 pols = t[1]
1305 if len(pols) == 1: pol = pols[0]
1306 else: pol = AndPolicy(pols)
1307 pol.setcategory(cat)
1308 return pol
1309Type.setParseAction(build_type)
1310
1311TARGETS = []
1312class TargetJob (object):
1313 def __init__(me, targetdir, policies):
1314 me.targetdir = targetdir
1315 me.policies = policies
1316 def perform(me):
1317 TARGETS.append(me)
1318
1319Target = K('target') - String - D('{') - R(Type) - D('}')
1320def build_target(s, l, t):
1321 return TargetJob(t[0], t[1])
1322Target.setParseAction(build_target)
1323
1324VARS = { 'master': None }
1325class VarsJob (object):
1326 def __init__(me, vars):
1327 me.vars = vars
1328 def perform(me):
1329 for k, v in me.vars:
1330 VARS[k] = v
1331
1332Var = prop('master', String)
1333Vars = K('vars') - D('{') - R(Var) - D('}')
1334def build_vars(s, l, t):
1335 return VarsJob(t[0])
1336Vars.setParseAction(build_vars)
1337
1338TopLevel = Vars | Target
1339Config = R(TopLevel)
1340Config.ignore(P.pythonStyleComment)
1341
1342###--------------------------------------------------------------------------
583b7e4a
MW
1343### The directory grobbler.
1344
e0361afb 1345def grobble(master, targets, noact = False):
583b7e4a 1346 """
e0361afb
MW
1347 Work through the MASTER directory, writing converted files to TARGETS.
1348
1349 The TARGETS are a list of `TargetJob' objects, each describing a target
1350 directory and a policy to apply to it.
1351
1352 If NOACT is true, then don't actually do anything permanent to the
1353 filesystem.
583b7e4a
MW
1354 """
1355
e0361afb
MW
1356 ## Transform the targets into a more convenient data structure.
1357 tpolmap = []
1358 for t in targets:
1359 pmap = {}
1360 tpolmap.append(pmap)
1361 for p in t.policies: pmap.setdefault(p.cat, []).append(p)
583b7e4a 1362
e0361afb
MW
1363 ## Keep track of the current position in the master tree.
1364 dirs = []
583b7e4a 1365
e0361afb
MW
1366 ## And the files which haven't worked.
1367 broken = []
583b7e4a 1368
e0361afb
MW
1369 def grobble_file(master, pmap, targetdir, cohorts):
1370 ## Convert MASTER, writing the result to TARGETDIR.
1371 ##
1372 ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
1373 ## a list of (FILENAME, ID) pairs.
1374 ##
1375 ## Since this function might convert the MASTER file, the caller doesn't
1376 ## know the name of the output files, so we return then as a list.
583b7e4a
MW
1377
1378 done = set()
1379 st_m = OS.stat(master)
1380
1381 ## Work through each category listed and apply its policy.
1382 for cat, id, cohort in cohorts:
1383
1384 ## Go through the category's policies and see if any match. If we fail
1385 ## here, see if there are more categories to try.
e0361afb 1386 for pol in pmap[cat]:
583b7e4a
MW
1387 acts = pol.actions(master, targetdir, id, cohort)
1388 if acts: break
1389 else:
1390 continue
1391
1392 ## Work through the targets one by one.
1393 for a in acts:
1394 done.add(a.target)
1395
1396 ## Find out whether the target file already exists and is up-to-date
1397 ## with respect to the master. (Caution here with low-resolution
1398 ## timestamps.) If it's OK, then just move on.
1399 try:
1400 st_t = OS.stat(a.target)
1401 if st_m.st_mtime < st_t.st_mtime or \
1402 (st_m.st_ino, st_m.st_dev) == (st_t.st_ino, st_t.st_dev):
1403 continue
1404 except OSError, err:
1405 if err.errno not in (E.ENOENT, E.ENOTDIR):
1406 raise
1407
1408 ## We have real work to do. If there's a current status message,
1409 ## it's the containing directory so flush it so that people know
1410 ## where we are.
1411 STATUS.commit()
1412
1413 ## Remove the target. (A hardlink will fail if the target already
1414 ## exists.)
e0361afb 1415 if not noact:
583b7e4a
MW
1416 try:
1417 OS.unlink(a.target)
1418 except OSError, err:
1419 if err.errno not in (E.ENOENT, E.ENOTDIR):
1420 raise
1421
1422 ## Do whatever it is we decided to do.
e0361afb 1423 if noact:
583b7e4a
MW
1424 STATUS.commit(filestatus(master, a))
1425 else:
1426 a.perform()
1427
1428 ## We're done. Return the names of the targets.
1429 return list(done)
1430
1431 @contextmanager
e0361afb
MW
1432 def wrap(masterfile):
1433 ## Handle exceptions found while trying to convert a particular file or
1434 ## directory.
583b7e4a
MW
1435
1436 try:
1437 yield masterfile
1438
1439 ## Something bad happened. Report the error, but continue. (This list
1440 ## of exceptions needs a lot of work.)
1441 except (IOError, OSError), exc:
1442 STATUS.clear()
1443 STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
e0361afb 1444 broken.append((masterfile, exc))
583b7e4a 1445
e0361afb
MW
1446 def grobble_dir(master, targets):
1447 ## Recursively convert files in MASTER, writing them to the TARGETS.
583b7e4a 1448
e0361afb
MW
1449 ## Keep track of the subdirectories we encounter, because we'll need to
1450 ## do all of those in one go at the end.
1451 subdirs = set()
583b7e4a 1452
e0361afb
MW
1453 ## Work through each target directory in turn.
1454 for target, pmap in zip(targets, tpolmap):
583b7e4a 1455
e0361afb
MW
1456 ## Make sure the TARGET exists and is a directory. It's a fundamental
1457 ## assumption of this program that the entire TARGET tree is
1458 ## disposable, so if something exists but isn't a directory, we should
1459 ## kill it.
1460 if OS.path.isdir(target):
1461 pass
1462 else:
1463 if OS.path.exists(target):
1464 STATUS.commit(filestatus(target, 'clear nondirectory'))
1465 if not noact:
1466 OS.unlink(target)
1467 STATUS.commit(filestatus(target, 'create directory'))
1468 if not noact:
1469 OS.mkdir(target)
1470
1471 ## Keep a list of things in the target. As we convert files, we'll
1472 ## check them off. Anything left over is rubbish and needs to be
1473 ## deleted.
1474 checklist = {}
1475 try:
1476 for i in OS.listdir(target):
1477 checklist[i] = False
1478 except OSError, err:
1479 if err.errno not in (E.ENOENT, E.ENOTDIR):
1480 raise
1481
1482 ## Keep track of the files in each category.
1483 catmap = {}
1484 todo = []
1485 done = []
1486
1487 ## Work through the master files.
1488 for f in sorted(OS.listdir(master)):
1489
1490 ## If the killswitch has been pulled then stop. The whole idea is
1491 ## that we want to cause a clean shutdown if possible, so we don't
1492 ## want to do it in the middle of encoding because the encoding
1493 ## effort will have been wasted. This is the only place we need to
1494 ## check. If we've exited the loop, then clearing old files will
1495 ## probably be fast, and we'll either end up here when the recursive
1496 ## call returns or we'll be in the same boat as before, clearing old
1497 ## files, only up a level. If worst comes to worst, we'll be killed
1498 ## forcibly somewhere inside `SH.rmtree', and that can continue where
1499 ## it left off.
1500 if KILLSWITCH.is_set():
1501 return
1502
1503 ## Do something with the file.
1504 with wrap(OS.path.join(master, f)) as masterfile:
1505
1506 ## If it's a directory then prepare to grobble it recursively, but
1507 ## don't do that yet.
1508 if OS.path.isdir(masterfile):
1509 subdirs.add(f)
1510 done.append(OS.path.join(target, f))
1511
1512 ## Otherwise it's a file. Work out what kind, and stash it under
1513 ## the appropriate categories. Later, we'll apply policy to the
1514 ## files, by category, and work out what to do with them all.
1515 else:
1516 gf = GIO.File(masterfile)
1517 mime = gf.query_info('standard::content-type').get_content_type()
1518 cats = []
1519 for cat in pmap.iterkeys():
1520 id = cat.identify(masterfile, mime)
1521 if id is None: continue
1522 catmap.setdefault(cat, []).append((masterfile, id))
1523 cats.append((cat, id))
1524 if not cats:
1525 catmap.setdefault(None, []).append((masterfile, id))
1526 todo.append((masterfile, cats))
1527
1528 ## Work through the categorized files to see what actions to do for
1529 ## them.
1530 for masterfile, cats in todo:
1531 with wrap(masterfile):
1532 done += grobble_file(masterfile, pmap, target,
1533 [(cat, id, catmap[cat]) for cat, id in cats])
1534
1535 ## Check the results off the list so that we don't clear it later.
1536 for f in done:
1537 checklist[OS.path.basename(f)] = True
1538
1539 ## Maybe there's stuff in the target which isn't accounted for. Delete
1540 ## it: either the master has changed, or the policy for this target has
1541 ## changed. Either way, the old files aren't wanted.
1542 for f in checklist:
1543 if not checklist[f]:
1544 STATUS.commit(filestatus(f, 'clear bogus file'))
1545 if not noact:
1546 bogus = OS.path.join(target, f)
1547 try:
1548 if OS.path.isdir(bogus):
1549 SH.rmtree(bogus)
1550 else:
1551 OS.unlink(bogus)
1552 except OSError, err:
1553 if err.errno != E.ENOENT:
1554 raise
1555
1556 ## If there are subdirectories which want processing then do those.
1557 ## Keep the user amused by telling him where we are in the tree.
1558 for d in sorted(subdirs):
1559 dirs.append(d)
1560 STATUS.set('/'.join(dirs))
1561 with wrap(OS.path.join(master, d)) as masterdir:
1562 try:
1563 grobble_dir(masterdir,
1564 [OS.path.join(target, d) for target in targets])
1565 finally:
1566 dirs.pop()
1567 STATUS.set('/'.join(dirs))
583b7e4a 1568
e0361afb
MW
1569 ## Right. We're ready to go.
1570 grobble_dir(master, [t.targetdir for t in targets])
1571 return broken
583b7e4a
MW
1572
1573###--------------------------------------------------------------------------
1574### Command-line interface.
1575
1576QUIS = OS.path.basename(SYS.argv[0])
1577
1578def moan(msg):
1579 "Report a warning message to the user."
1580 SYS.stderr.write('%s: %s\n' % (QUIS, msg))
1581
1582def die(msg):
1583 "Report a fatal error message to the user."
1584 moan(msg)
1585 SYS.exit(1)
1586
1587def parse_opts(args):
1588 """
1589 Parse command-line arguments in ARGS.
1590
1591 Returns a Grobbler object and the MASTER and TARGET directories to be
1592 grobbled.
1593 """
1594
1595 ## Build the option parser object.
1596 op = OP.OptionParser(prog = QUIS, version = VERSION,
5379ab85
MW
1597 usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
1598 'CONFIG',
583b7e4a
MW
1599 description = """\
1600Convert a directory tree of files according to the configuration file
1601CONFIG.
1602""")
1603
1604 ## Timeout handling.
1605 def cb_time(opt, ostr, arg, op):
1606 m = RX.match(r'\s*(\d+)\s*([dhms]?)\s*', arg)
1607 if not m:
1608 raise OP.OptionValueerror, 'bad time value `%s\'' % arg
1609 t, u = m.groups()
1610 t = int(t) * { '': 1, 's': 1, 'm': 60, 'h': 3600, 'd': 86400 }[u]
1611 setattr(op.values, opt.dest, t)
1612 op.add_option('-t', '--timeout', type = 'string', metavar = 'SECS',
1613 dest = 'timeout',
1614 help = 'stop processing nicely after SECS',
1615 action = 'callback', callback = cb_time)
1616 op.add_option('-T', '--timeout-nasty', type = 'string', metavar = 'SECS',
1617 dest = 'timeout_nasty',
1618 help = 'stop processing unpleasantly after further SECS',
1619 action = 'callback', callback = cb_time)
1620
1621 ## Other options.
1622 op.add_option('-i', '--interactive', action = 'store_true', dest = 'tty',
1623 help = 'provide progress information')
1624 op.add_option('-n', '--no-act', action = 'store_true', dest = 'noact',
1625 help = 'don\'t actually modify the filesystem')
1626
1627 ## Ready to rock.
1628 op.set_defaults(formats = [], noact = False,
1629 timeout = None, timeout_nasty = 300)
1630 opts, args = op.parse_args(args)
1631
1632 ## Check that we got the non-option arguments that we want.
1633 if len(args) != 1:
1634 op.error('wrong number of arguments')
1635
1636 ## Act on the options.
1637 if opts.tty:
1638 STATUS.eyecandyp = True
1639 if opts.timeout is not None:
1640 to = TH.Thread(target = timeout,
1641 args = (opts.timeout, opts.timeout_nasty))
1642 to.daemon = True
1643 to.start()
1644
1645 ## Parse the configuration file.
1646 with open(args[0]) as conf:
1647 jobs, = Config.parseFile(conf, True)
1648 for j in jobs:
1649 j.perform()
1650
1651 return opts
1652
1653if __name__ == '__main__':
1654 opts = parse_opts(SYS.argv[1:])
1655 if 'master' not in VARS:
1656 die("no master directory set")
e0361afb 1657 broken = grobble(VARS['master'], TARGETS, opts.noact)
583b7e4a
MW
1658 if broken:
1659 moan('failed to convert some files:')
1660 for file, exc in broken:
1661 moan('%s: %s' % (file, exc))
1662 SYS.exit(1)
1663
1664 ## This is basically a successful completion: we did what we were asked to
1665 ## do. It seems polite to report a message, though.
1666 ##
1667 ## Why don't we have a nonzero exit status? The idea would be that a
1668 ## calling script would be interested that we used up all of our time, and
1669 ## not attempt to convert some other directory as well. But that doesn't
1670 ## quite work. Such a script would need to account correctly for time we
1671 ## had spent even if we complete successfully. And if the script is having
1672 ## to watch the clock itself, it can do that without our help here.
1673 if KILLSWITCH.is_set():
1674 moan('killed by timeout')
1675
1676###----- That's all, folks --------------------------------------------------