###----- Licensing notice ---------------------------------------------------
###
-### This program is free software; you can redistribute it and/or modify
+### This file is part of the `autoys' audio tools collection.
+###
+### `autoys' is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.
###
-### This program is distributed in the hope that it will be useful,
+### `autoys' is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
-### along with this program; if not, write to the Free Software Foundation,
+### along with `autoys'; if not, write to the Free Software Foundation,
### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
###--------------------------------------------------------------------------
import optparse as OP
import threading as TH
import shlex as L
-from math import sqrt
+from math import sqrt, ceil
from contextlib import contextmanager
## eyeD3 tag fettling.
-import eyeD3 as E3
+import eyed3 as E3
-## Gstreamer. It picks up command-line arguments -- most notably `--help' --
-## and processes them itself. Of course, its help is completely wrong. This
-## kludge is due to Jonas Wagner.
-_argv, SYS.argv = SYS.argv, []
-import gobject as G
-import gio as GIO
-import gst as GS
-SYS.argv = _argv
+## Gstreamer.
+import gi
+gi.require_version('GLib', '2.0'); from gi.repository import GLib as G
+gi.require_version('Gio', '2.0'); from gi.repository import Gio as GIO
+gi.require_version('Gst', '1.0'); from gi.repository import Gst as GS
+GS.init([])
## Python Imaging.
from PIL import Image as I
else: w += 1
## Done.
- #print ';; %r -> %d' % (s, w)
return w
class StatusLine (object):
## Eyecandy update.
if me.eyecandyp:
- #print
- #print ';; new status %r' % line
## If the old line was longer, we need to clobber its tail, so work out
## what that involves.
## Actually do the output, all in one syscall.
b = charwidth(me._last[i:])
SYS.stdout.write(pre + '\b'*b + line[i:])
- #print ';; => %r' % (pre + '\b'*b + line[i:])
SYS.stdout.flush()
## Update our idea of what's gone on.
## Work out -- well, guess -- the time remaining.
if cur:
t = T.time()
- eta = me._fmt_time((t - me._start)*(max - cur)/cur)
+ eta = me._fmt_time(ceil((t - me._start)*(max - cur)/cur))
else:
eta = '???'
## Handy abbreviations for constructed parser elements.
def K(k): return P.Keyword(k).suppress()
def D(d): return P.Literal(d).suppress()
-##R = P.ZeroOrMore
def R(p): return P.ZeroOrMore(p).setParseAction(lambda s, l, t: [t])
O = P.Optional
named format and its superclasses.
"""
+ name = 'format-spec'
+
## We cache the parser elements we generate to avoid enormous consing.
CACHE = {}
def make_element(factory, name = None, **props):
"Return a new element from the FACTORY with the given NAME and PROPS."
- elt = GS.element_factory_make(factory, name)
+ elt = GS.ElementFactory.make(factory, name)
+ if elt is None: raise ValueError, 'failed to make `%s\' element' % factory
elt.set_properties(**props)
return elt
+def link_elements(elts):
+ "Link the elements ELTS together, in order."
+ e0 = None
+ for e1 in elts:
+ if e0 is not None: e0.link(e1)
+ e0 = e1
+
+def bin_children(bin):
+ "Iterate over the (direct) children of a BIN."
+ iter = bin.iterate_elements()
+ while True:
+ rc, elt = iter.next()
+ if rc == GS.IteratorResult.DONE: break
+ elif rc != GS.IteratorResult.OK:
+ raise ValueError, 'iteration failed (%s)' % rc
+ else: yield elt
+
class GStreamerProgressEyecandy (ProgressEyecandy):
"""
Provide amusement while GStreamer is busy doing something.
## time, because (particularly with VBR-encoded MP3 inputs) the estimated
## duration can change as we progress. Hopefully it settles down fairly
## soon.
- try:
- t, hunoz = me._elt.query_position(GS.FORMAT_TIME)
- end, hukairz = me._elt.query_duration(GS.FORMAT_TIME)
- return t, end
- except GS.QueryError:
- return None, None
+ ok, t = me._elt.query_position(GS.Format.TIME)
+ if ok: ok, end = me._elt.query_duration(GS.Format.TIME)
+ if ok: return t, end
+ else: return None, None
def __enter__(me):
"Enter context: attach progress meter display."
return
## Update regularly. The pipeline runs asynchronously.
- me._id = G.timeout_add(200, me._update)
+ me._id = G.timeout_add(100, me._update)
def __exit__(me, ty, val, tb):
"Leave context: remove display and report completion or failure."
demand.
"""
- def __init__(me, file, mime):
- "Initialize the object suitably for identifying FILE."
-
- ## Make some initial GStreamer objects. We'll want the pipeline later if
- ## we need to analyse a poorly tagged MP3 stream, so save it away.
- me._pipe = GS.Pipeline()
- me._file = file
- bus = me._pipe.get_bus()
- bus.add_signal_watch()
- loop = G.MainLoop()
+ def _prepare_pipeline(me):
+ pipe = GS.Pipeline()
+ bus = pipe.get_bus()
## The basic recognition kit is based around `decodebin'. We must keep
## it happy by giving it sinks for the streams it's found, which it
## announces asynchronously.
- source = make_element('filesrc', 'file', location = file)
+ source = make_element('filesrc', 'file', location = me._file)
decoder = make_element('decodebin', 'decode')
sink = make_element('fakesink')
def decoder_pad_arrived(elt, pad):
- if pad.get_caps()[0].get_name().startswith('audio/'):
+ if pad.get_current_caps()[0].get_name().startswith('audio/'):
elt.link_pads(pad.get_name(), sink, 'sink')
- dpaid = decoder.connect('pad-added', decoder_pad_arrived)
- me._pipe.add(source, decoder, sink)
- GS.element_link_many(source, decoder)
+ decoder.connect('pad-added', decoder_pad_arrived)
+ for i in [source, decoder, sink]: pipe.add(i)
+ link_elements([source, decoder])
+
+ ## Done.
+ return pipe, bus, decoder, sink
+
+ def __init__(me, file, mime):
+ "Initialize the object suitably for identifying FILE."
+
+ me._file = file
+ pipe, bus, decoder, sink = me._prepare_pipeline()
+
+ ## Make some initial GStreamer objects. We'll want the pipeline later if
+ ## we need to analyse a poorly tagged MP3 stream, so save it away.
+ loop = G.MainLoop()
## Arrange to collect tags from the pipeline's bus as they're reported.
- ## If we reuse the pipeline later, we'll want different bus-message
- ## handling, so make sure we can take the signal handler away.
tags = {}
fail = []
def bus_message(bus, msg):
- if msg.type == GS.MESSAGE_ERROR:
- fail[:] = (ValueError, msg.structure['debug'], None)
+ ty, s = msg.type, msg.get_structure()
+ if ty == GS.MessageType.ERROR:
+ fail[:] = (ValueError, s['debug'], None)
loop.quit()
- elif msg.type == GS.MESSAGE_STATE_CHANGED:
- if msg.structure['new-state'] == GS.STATE_PAUSED and \
- msg.src == me._pipe:
+ elif ty == GS.MessageType.STATE_CHANGED:
+ if s['new-state'] == GS.State.PAUSED and \
+ msg.src == pipe:
loop.quit()
- elif msg.type == GS.MESSAGE_TAG:
- tags.update(msg.structure)
+ elif ty == GS.MessageType.TAG:
+ tt = s['taglist']
+ for i in xrange(tt.n_tags()):
+ t = tt.nth_tag_name(i)
+ if tt.get_tag_size(t) != 1: continue
+ v = tt.get_value_index(t, 0)
+ tags[t] = v
bmid = bus.connect('message', bus_message)
## We want to identify the kind of stream this is. (Hmm. The MIME type
## things from being too awful.)
me.cap = None
me.dcap = None
- for e in decoder.elements():
+ for e in bin_children(decoder):
if e.get_factory().get_name() == 'typefind':
tfelt = e
break
## Crank up most of the heavy machinery. The message handler will stop
## the loop when things seem to be sufficiently well underway.
- me._pipe.set_state(GS.STATE_PAUSED)
+ bus.add_signal_watch()
+ pipe.set_state(GS.State.PAUSED)
loop.run()
bus.disconnect(bmid)
- decoder.disconnect(dpaid)
+ bus.remove_signal_watch()
if fail:
- me._pipe.set_state(GS.STATE_NULL)
+ pipe.set_state(GS.State.NULL)
raise fail[0], fail[1], fail[2]
## Store the collected tags.
## Gather the capabilities. The `typefind' element knows the input data
## type. The 'decodebin' knows the raw data type.
- me.cap = tfelt.get_pad('src').get_negotiated_caps()[0]
+ me.cap = tfelt.get_static_pad('src').get_allowed_caps()[0]
me.mime = set([mime, me.cap.get_name()])
- me.dcap = sink.get_pad('sink').get_negotiated_caps()[0]
+ me.dcap = sink.get_static_pad('sink').get_allowed_caps()[0]
## If we found a plausible bitrate then stash it. Otherwise note that we
## failed. If anybody asks then we'll work it out then.
elif 'bitrate' in tags and tags['bitrate'] >= 80000:
me._bitrate = tags['bitrate']/1000
else:
- me._bitrate = None
-
- ## The bitrate computation wants the file size. Ideally we'd want the
- ## total size of the frames' contents, but that seems hard to dredge
- ## out. If the framing overhead is small, this should be close enough
- ## for our purposes.
- me._bytes = OS.stat(file).st_size
-
- def __del__(me):
- "Close the pipeline down so we don't leak file descriptors."
- me._pipe.set_state(GS.STATE_NULL)
+ ok, n = pipe.query_duration(GS.Format.BYTES)
+ if ok: ok, t = pipe.query_duration(GS.Format.TIME)
+ if ok: me._bitrate = int((8e6*n)/t)
+ else: me._bitrate = None
+ pipe.set_state(GS.State.NULL)
@property
def bitrate(me):
if me._bitrate is not None:
return me._bitrate
- ## Make up a new main loop.
+ ## Make up a new pipeline and main loop.
+ pipe, bus, _, _ = me._prepare_pipeline()
loop = G.MainLoop()
## Watch for bus messages. We'll stop when we reach the end of the
## stream: then we'll have a clear idea of how long the track was.
fail = []
def bus_message(bus, msg):
- if msg.type == GS.MESSAGE_ERROR:
- fail[:] = (ValueError, msg.structure['debug'], None)
+ ty, s = msg.type, msg.get_structure()
+ if ty == GS.MessageType.ERROR:
+ fail[:] = (ValueError, s['debug'], None)
loop.quit()
- elif msg.type == GS.MESSAGE_EOS:
+ elif ty == GS.MessageType.EOS:
loop.quit()
- bus = me._pipe.get_bus()
+ bus = pipe.get_bus()
bmid = bus.connect('message', bus_message)
## Get everything moving, and keep the user amused while we work.
- me._pipe.set_state(GS.STATE_PLAYING)
- with GStreamerProgressEyecandy(filestatus(file, 'measure bitrate') %
- me._pipe,
- silentp = True):
+ bus.add_signal_watch()
+ pipe.set_state(GS.State.PLAYING)
+ with GStreamerProgressEyecandy(filestatus(me._file, 'measure bitrate'),
+ pipe, silentp = True):
loop.run()
+ bus.remove_signal_watch()
bus.disconnect(bmid)
if fail:
- me._pipe.set_state(GS.STATE_NULL)
+ pipe.set_state(GS.State.NULL)
raise fail[0], fail[1], fail[2]
+ STATUS.clear()
+
+ ## The bitrate computation wants the file size. Ideally we'd want the
+ ## total size of the frames' contents, but that seems hard to dredge
+ ## out. If the framing overhead is small, this should be close enough
+ ## for our purposes.
+ bytes = OS.stat(me._file).st_size
## Now we should be able to find out our position accurately and work out
## a bitrate. Cache it in case anybody asks again.
- t, hukairz = me._pipe.query_position(GS.FORMAT_TIME)
- me._bitrate = int(8*me._bytes*1e6/t)
+ ok, t = pipe.query_position(GS.Format.TIME)
+ assert ok, 'failed to discover bitrate'
+ me._bitrate = int(8*bytes*1e6/t)
+ pipe.set_state(GS.State.NULL)
## Done.
return me._bitrate
"""
elts = me.encoder_chain()
bin = GS.Bin()
- bin.add(*elts)
- GS.element_link_many(*elts)
- bin.add_pad(GS.GhostPad('sink', elts[0].get_pad('sink')))
- bin.add_pad(GS.GhostPad('src', elts[-1].get_pad('src')))
+ for i in elts: bin.add(i)
+ link_elements(elts)
+ bin.add_pad(GS.GhostPad('sink', elts[0].get_static_pad('sink')))
+ bin.add_pad(GS.GhostPad('src', elts[-1].get_static_pad('src')))
return bin
def convert(me, master, id, target):
## Construct the necessary equipment.
pipe = GS.Pipeline()
bus = pipe.get_bus()
- bus.add_signal_watch()
loop = G.MainLoop()
## Make sure that there isn't anything in the way of our output. We're
convert = make_element('audioconvert', 'convert')
encoder = me.encoder()
sink = make_element('filesink', 'sink', location = new)
- pipe.add(source, decoder, convert, encoder, sink)
- GS.element_link_many(source, decoder)
- GS.element_link_many(convert, encoder, sink)
+ for i in [source, decoder, convert, encoder, sink]: pipe.add(i)
+ link_elements([source, decoder])
+ link_elements([convert, encoder, sink])
## Some decoders (e.g., the AC3 decoder) include channel-position
## indicators in their output caps. The Vorbis encoder interferes with
## our encoding chain. For now, we'll hope that there's only one audio
## stream in there, and just throw everything else away.
def decoder_pad_arrived(elt, pad):
- if pad.get_caps()[0].get_name().startswith('audio/'):
+ if pad.get_current_caps()[0].get_name().startswith('audio/'):
if dcap:
elt.link_pads_filtered(pad.get_name(), convert, 'sink', dcap)
else:
## Watch the bus for completion messages.
fail = []
def bus_message(bus, msg):
- if msg.type == GS.MESSAGE_ERROR:
- fail[:] = (ValueError, msg.structure['debug'], None)
+ if msg.type == GS.MessageType.ERROR:
+ fail[:] = (ValueError, msg.get_structure()['debug'], None)
loop.quit()
- elif msg.type == GS.MESSAGE_EOS:
+ elif msg.type == GS.MessageType.EOS:
loop.quit()
bmid = bus.connect('message', bus_message)
## Get everything ready and let it go.
- pipe.set_state(GS.STATE_PLAYING)
+ bus.add_signal_watch()
+ pipe.set_state(GS.State.PLAYING)
with GStreamerProgressEyecandy(filestatus(master,
'convert to %s' % me.NAME),
pipe):
loop.run()
- pipe.set_state(GS.STATE_NULL)
+ pipe.set_state(GS.State.NULL)
+ bus.remove_signal_watch()
+ bus.disconnect(bmid)
if fail:
raise fail[0], fail[1], fail[2]
class OggVorbisFormat (AudioFormat):
"AudioFormat object for Ogg Vorbis."
- ## From http://en.wikipedia.org/wiki/Vorbis
+ ## From https://en.wikipedia.org/wiki/Vorbis
QMAP = [(-1, 45), ( 0, 64), ( 1, 80), ( 2, 96),
( 3, 112), ( 4, 128), ( 5, 160), ( 6, 192),
( 7, 224), ( 8, 256), ( 9, 320), (10, 500)]
EXT = 'ogg'
def encoder_chain(me):
- for q, br in me.QMAP:
- if br >= me.bitrate:
- break
- else:
- raise ValueError, 'no suitable quality setting found'
- return [make_element('vorbisenc',
- quality = q/10.0),
+ encprops = {}
+ if me.bitrate is not None:
+ for q, br in me.QMAP:
+ if br >= me.bitrate:
+ break
+ else:
+ raise ValueError, 'no suitable quality setting found'
+ encprops['quality'] = q/10.0
+ return [make_element('vorbisenc', **encprops),
make_element('oggmux')]
defformat('ogg-vorbis', OggVorbisFormat)
EXT = 'mp3'
def encoder_chain(me):
- return [make_element('lame',
- vbr_mean_bitrate = me.bitrate,
- vbr = 4),
+ encprops = {}
+ if me.bitrate is not None:
+ encprops['bitrate'] = me.bitrate
+ encprops['target'] = 'bitrate'
+ else:
+ encprops['quality'] = 4
+ encprops['target'] = 'quality'
+ return [make_element('lamemp3enc', quality = 4, **encprops),
make_element('xingmux'),
make_element('id3v2mux')]
GStreamer produces ID3v2 tags, but not ID3v1. This seems unnecessarily
unkind to stupid players.
"""
- tag = E3.Tag()
- tag.link(path)
- tag.setTextEncoding(E3.UTF_8_ENCODING)
- try:
- tag.update(E3.ID3_V1_1)
- except (UnicodeEncodeError, E3.tag.GenreException):
- pass
+ f = E3.load(path)
+ if f is None: return
+ t = f.tag
+ if t is None: return
+ for v in [E3.id3.ID3_V2_3, E3.id3.ID3_V1]:
+ try: f.tag.save(version = v)
+ except (UnicodeEncodeError,
+ E3.id3.GenreException,
+ E3.id3.TagException):
+ pass
defformat('mp3', MP3Format)
optimize
If present, take a second pass to select optimal encoder settings.
- progression
+ progressive
If present, make a progressive file.
quality Integer from 1--100 (worst to best); default is 75.
defformat('bmp', BMPFormat)
###--------------------------------------------------------------------------
+### Remaining parsing machinery.
+
+Type = K('type') - Name - D('{') - R(Policy) - D('}')
+def build_type(s, l, t):
+ try:
+ cat = CATEGORYMAP[t[0]]
+ except KeyError:
+ raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
+ pols = t[1]
+ if len(pols) == 1: pol = pols[0]
+ else: pol = AndPolicy(pols)
+ pol.setcategory(cat)
+ return pol
+Type.setParseAction(build_type)
+
+TARGETS = []
+class TargetJob (object):
+ def __init__(me, targetdir, policies):
+ me.targetdir = targetdir
+ me.policies = policies
+ def perform(me):
+ TARGETS.append(me)
+
+Target = K('target') - String - D('{') - R(Type) - D('}')
+def build_target(s, l, t):
+ return TargetJob(t[0], t[1])
+Target.setParseAction(build_target)
+
+VARS = { 'master': None }
+class VarsJob (object):
+ def __init__(me, vars):
+ me.vars = vars
+ def perform(me):
+ for k, v in me.vars:
+ VARS[k] = v
+
+Var = prop('master', String)
+Vars = K('vars') - D('{') - R(Var) - D('}')
+def build_vars(s, l, t):
+ return VarsJob(t[0])
+Vars.setParseAction(build_vars)
+
+TopLevel = Vars | Target
+Config = R(TopLevel)
+Config.ignore(P.pythonStyleComment)
+
+###--------------------------------------------------------------------------
### The directory grobbler.
-class Grobbler (object):
+def grobble(master, targets, noact = False):
"""
- The directory grobbler copies a directory tree, converting files.
+ Work through the MASTER directory, writing converted files to TARGETS.
+
+ The TARGETS are a list of `TargetJob' objects, each describing a target
+ directory and a policy to apply to it.
+
+ If NOACT is true, then don't actually do anything permanent to the
+ filesystem.
"""
- def __init__(me, policies, noact = False):
- """
- Create a new Grobbler, working with the given POLICIES.
- """
- me._pmap = {}
- me._noact = noact
- for p in policies:
- me._pmap.setdefault(p.cat, []).append(p)
- me._dirs = []
+ ## Transform the targets into a more convenient data structure.
+ tpolmap = []
+ for t in targets:
+ pmap = {}
+ tpolmap.append(pmap)
+ for p in t.policies: pmap.setdefault(p.cat, []).append(p)
- def _grobble_file(me, master, targetdir, cohorts):
- """
- Convert MASTER, writing the result to TARGETDIR.
+ ## Keep track of the current position in the master tree.
+ dirs = []
- The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is a
- list of (FILENAME, ID) pairs.
+ ## And the files which haven't worked.
+ broken = []
- Since this function might convert the MASTER file, the caller doesn't
- know the name of the output files, so we return then as a list.
- """
+ def grobble_file(master, pmap, targetdir, cohorts):
+ ## Convert MASTER, writing the result to TARGETDIR.
+ ##
+ ## The COHORTS are actually (CAT, ID, COHORT) triples, where a COHORT is
+ ## a list of (FILENAME, ID) pairs.
+ ##
+ ## Since this function might convert the MASTER file, the caller doesn't
+ ## know the name of the output files, so we return then as a list.
done = set()
st_m = OS.stat(master)
## Go through the category's policies and see if any match. If we fail
## here, see if there are more categories to try.
- for pol in me._pmap[cat]:
+ for pol in pmap[cat]:
acts = pol.actions(master, targetdir, id, cohort)
if acts: break
else:
## Remove the target. (A hardlink will fail if the target already
## exists.)
- if not me._noact:
+ if not noact:
try:
OS.unlink(a.target)
except OSError, err:
raise
## Do whatever it is we decided to do.
- if me._noact:
+ if noact:
STATUS.commit(filestatus(master, a))
else:
a.perform()
return list(done)
@contextmanager
- def _wrap(me, masterfile):
- """
- Handle exceptions found while trying to convert a particular file or
- directory.
- """
+ def wrap(masterfile):
+ ## Handle exceptions found while trying to convert a particular file or
+ ## directory.
try:
yield masterfile
except (IOError, OSError), exc:
STATUS.clear()
STATUS.commit(filestatus(masterfile, 'failed (%s)' % exc))
- me._broken.append((masterfile, exc))
-
- def _grobble_dir(me, master, target):
- """
- Recursively convert files in MASTER, writing them to TARGET.
- """
-
- ## Make sure the TARGET exists and is a directory. It's a fundamental
- ## assumption of this program that the entire TARGET tree is disposable,
- ## so if something exists but isn't a directory, we should kill it.
- if OS.path.isdir(target):
- pass
- else:
- if OS.path.exists(target):
- STATUS.commit(filestatus(target, 'clear nondirectory'))
- if not me._noact:
- OS.unlink(target)
- STATUS.commit(filestatus(target, 'create directory'))
- if not me._noact:
- OS.mkdir(target)
-
- ## Keep a list of things in the target. As we convert files, we'll check
- ## them off. Anything left over is rubbish and needs to be deleted.
- checklist = {}
- try:
- for i in OS.listdir(target):
- checklist[i] = False
- except OSError, err:
- if err.errno not in (E.ENOENT, E.ENOTDIR):
- raise
-
- ## Keep track of the files in each category.
- catmap = {}
- todo = []
- done = []
-
- ## Work through the master files.
- for f in sorted(OS.listdir(master)):
-
- ## If the killswitch has been pulled then stop. The whole idea is that
- ## we want to cause a clean shutdown if possible, so we don't want to
- ## do it in the middle of encoding because the encoding effort will
- ## have been wasted. This is the only place we need to check. If
- ## we've exited the loop, then clearing old files will probably be
- ## fast, and we'll either end up here when the recursive call returns
- ## or we'll be in the same boat as before, clearing old files, only up
- ## a level. If worst comes to worst, we'll be killed forcibly
- ## somewhere inside `SH.rmtree', and that can continue where it left
- ## off.
- if KILLSWITCH.is_set():
- return
-
- ## Do something with the file.
- with me._wrap(OS.path.join(master, f)) as masterfile:
-
- ## If it's a directory then grobble it recursively. Keep the user
- ## amused by telling him where we are in the tree.
- if OS.path.isdir(masterfile):
- me._dirs.append(f)
- STATUS.set('/'.join(me._dirs))
- try:
- done += me._grobble_dir(masterfile, OS.path.join(target, f))
- finally:
- me._dirs.pop()
- STATUS.set('/'.join(me._dirs))
-
- ## Otherwise it's a file. Work out what kind, and stash it under
- ## the appropriate categories. Later, we'll apply policy to the
- ## files, by category, and work out what to do with them all.
- else:
- gf = GIO.File(masterfile)
- mime = gf.query_info('standard::content-type').get_content_type()
- cats = []
- for cat in me._pmap.iterkeys():
- id = cat.identify(masterfile, mime)
- if id is None: continue
- catmap.setdefault(cat, []).append((masterfile, id))
- cats.append((cat, id))
- if not cats:
- catmap.setdefault(None, []).append((masterfile, id))
- todo.append((masterfile, cats))
-
- ## Work through the categorized files to see what actions to do for
- ## them.
- for masterfile, cats in todo:
- with me._wrap(masterfile):
- done += me._grobble_file(masterfile, target,
- [(cat, id, catmap[cat])
- for cat, id in cats])
-
- ## Check the results off the list so that we don't clear it later.
- for f in done:
- checklist[OS.path.basename(f)] = True
-
- ## Maybe there's stuff in the target which isn't accounted for. Delete
- ## it: either the master has changed, or the policy for this target has
- ## changed. Either way, the old files aren't wanted.
- for f in checklist:
- if not checklist[f]:
- STATUS.commit(filestatus(f, 'clear bogus file'))
- if not me._noact:
- bogus = OS.path.join(target, f)
- try:
- if OS.path.isdir(bogus):
- SH.rmtree(bogus)
- else:
- OS.unlink(bogus)
- except OSError, err:
- if err.errno != E.ENOENT:
- raise
+ broken.append((masterfile, exc))
- ## Return the target name, so that it can be checked off.
- return [target]
+ def grobble_dir(master, targets):
+ ## Recursively convert files in MASTER, writing them to the TARGETS.
- def grobble(me, master, target):
- """
- Convert MASTER, writing a directory tree TARGET.
+ ## Keep track of the subdirectories we encounter, because we'll need to
+ ## do all of those in one go at the end.
+ subdirs = set()
- Returns a list of files which couldn't be converted.
- """
- try:
- me._broken = []
- me._grobble_dir(master, target)
- return me._broken
- finally:
- del me._broken
+ ## Work through each target directory in turn.
+ for target, pmap in zip(targets, tpolmap):
-###--------------------------------------------------------------------------
-### Remaining parsing machinery.
-
-Type = K('type') - Name - D('{') - R(Policy) - D('}')
-def build_type(s, l, t):
- try:
- cat = CATEGORYMAP[t[0]]
- except KeyError:
- raise P.ParseException(s, loc, "Unknown category `%s'" % t[0])
- pols = t[1]
- if len(pols) == 1: pol = pols[0]
- else: pol = AndPolicy(pols)
- pol.setcategory(cat)
- return pol
-Type.setParseAction(build_type)
-
-TARGETS = []
-class TargetJob (object):
- def __init__(me, targetdir, policies):
- me.targetdir = targetdir
- me.policies = policies
- def perform(me):
- TARGETS.append(me)
-
-Target = K('target') - String - D('{') - R(Type) - D('}')
-def build_target(s, l, t):
- return TargetJob(t[0], t[1])
-Target.setParseAction(build_target)
-
-VARS = { 'master': None }
-class VarsJob (object):
- def __init__(me, vars):
- me.vars = vars
- def perform(me):
- for k, v in me.vars:
- VARS[k] = v
-
-Var = prop('master', String)
-Vars = K('vars') - D('{') - R(Var) - D('}')
-def build_vars(s, l, t):
- return VarsJob(t[0])
-Vars.setParseAction(build_vars)
+ ## Make sure the TARGET exists and is a directory. It's a fundamental
+ ## assumption of this program that the entire TARGET tree is
+ ## disposable, so if something exists but isn't a directory, we should
+ ## kill it.
+ if OS.path.isdir(target):
+ pass
+ else:
+ if OS.path.exists(target):
+ STATUS.commit(filestatus(target, 'clear nondirectory'))
+ if not noact:
+ OS.unlink(target)
+ STATUS.commit(filestatus(target, 'create directory'))
+ if not noact:
+ OS.mkdir(target)
+
+ ## Keep a list of things in the target. As we convert files, we'll
+ ## check them off. Anything left over is rubbish and needs to be
+ ## deleted.
+ checklist = {}
+ try:
+ for i in OS.listdir(target):
+ checklist[i] = False
+ except OSError, err:
+ if err.errno not in (E.ENOENT, E.ENOTDIR):
+ raise
+
+ ## Keep track of the files in each category.
+ catmap = {}
+ todo = []
+ done = []
+
+ ## Work through the master files.
+ for f in sorted(OS.listdir(master)):
+
+ ## If the killswitch has been pulled then stop. The whole idea is
+ ## that we want to cause a clean shutdown if possible, so we don't
+ ## want to do it in the middle of encoding because the encoding
+ ## effort will have been wasted. This is the only place we need to
+ ## check. If we've exited the loop, then clearing old files will
+ ## probably be fast, and we'll either end up here when the recursive
+ ## call returns or we'll be in the same boat as before, clearing old
+ ## files, only up a level. If worst comes to worst, we'll be killed
+ ## forcibly somewhere inside `SH.rmtree', and that can continue where
+ ## it left off.
+ if KILLSWITCH.is_set():
+ return
+
+ ## Do something with the file.
+ with wrap(OS.path.join(master, f)) as masterfile:
+
+ ## If it's a directory then prepare to grobble it recursively, but
+ ## don't do that yet.
+ if OS.path.isdir(masterfile):
+ subdirs.add(f)
+ done.append(OS.path.join(target, f))
+
+ ## Otherwise it's a file. Work out what kind, and stash it under
+ ## the appropriate categories. Later, we'll apply policy to the
+ ## files, by category, and work out what to do with them all.
+ else:
+ mime = GIO.file_new_for_path(masterfile) \
+ .query_info('standard::content-type', 0) \
+ .get_content_type()
+ cats = []
+ for cat in pmap.iterkeys():
+ id = cat.identify(masterfile, mime)
+ if id is None: continue
+ catmap.setdefault(cat, []).append((masterfile, id))
+ cats.append((cat, id))
+ if not cats:
+ catmap.setdefault(None, []).append((masterfile, id))
+ todo.append((masterfile, cats))
+
+ ## Work through the categorized files to see what actions to do for
+ ## them.
+ for masterfile, cats in todo:
+ with wrap(masterfile):
+ done += grobble_file(masterfile, pmap, target,
+ [(cat, id, catmap[cat]) for cat, id in cats])
+
+ ## Check the results off the list so that we don't clear it later.
+ for f in done:
+ checklist[OS.path.basename(f)] = True
+
+ ## Maybe there's stuff in the target which isn't accounted for. Delete
+ ## it: either the master has changed, or the policy for this target has
+ ## changed. Either way, the old files aren't wanted.
+ for f in checklist:
+ if not checklist[f]:
+ STATUS.commit(filestatus(f, 'clear bogus file'))
+ if not noact:
+ bogus = OS.path.join(target, f)
+ try:
+ if OS.path.isdir(bogus):
+ SH.rmtree(bogus)
+ else:
+ OS.unlink(bogus)
+ except OSError, err:
+ if err.errno != E.ENOENT:
+ raise
+
+ ## If there are subdirectories which want processing then do those.
+ ## Keep the user amused by telling him where we are in the tree.
+ for d in sorted(subdirs):
+ dirs.append(d)
+ STATUS.set('/'.join(dirs))
+ with wrap(OS.path.join(master, d)) as masterdir:
+ try:
+ grobble_dir(masterdir,
+ [OS.path.join(target, d) for target in targets])
+ finally:
+ dirs.pop()
+ STATUS.set('/'.join(dirs))
-TopLevel = Vars | Target
-Config = R(TopLevel)
-Config.ignore(P.pythonStyleComment)
+ ## Right. We're ready to go.
+ grobble_dir(master, [t.targetdir for t in targets])
+ return broken
###--------------------------------------------------------------------------
### Command-line interface.
## Build the option parser object.
op = OP.OptionParser(prog = QUIS, version = VERSION,
- usage = '%prog [-t TIMEOUT] CONFIG',
+ usage = '%prog [-in] [-t TIMEOUT] [-T TIMEOUT] '
+ 'CONFIG',
description = """\
Convert a directory tree of files according to the configuration file
CONFIG.
opts = parse_opts(SYS.argv[1:])
if 'master' not in VARS:
die("no master directory set")
- broken = []
- for t in TARGETS:
- g = Grobbler(t.policies, opts.noact)
- b = g.grobble(VARS['master'], t.targetdir)
- broken += b
+ broken = grobble(VARS['master'], TARGETS, opts.noact)
if broken:
moan('failed to convert some files:')
for file, exc in broken: