--- /dev/null
+#! /usr/bin/perl
+###
+### Construct an APT `sources.list' file
+###
+### (c) 2012 Mark Wooding
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This program is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### This program is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with this program; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+use Data::Dumper;
+use File::FnMatch qw(:fnmatch);
+use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
+use Text::ParseWords;
+
+###--------------------------------------------------------------------------
+### Miscellaneous utilities.
+
+(our $QUIS = $0) =~ s:.*/::;
+our $VERSION = "1.0.0";
+
+sub fail ($) {
+ my ($msg) = @_;
+ ## Report a fatal error MSG and exit.
+
+ print STDERR "$QUIS: $msg\n";
+ exit 1;
+}
+
+sub literalp ($) {
+ my ($s) = @_;
+ ## Answer whether the string S is free of metacharacters.
+
+ return $s !~ /[\\?*[]/;
+}
+
+###--------------------------------------------------------------------------
+### Configuration sets.
+
+sub cset_new () {
+ ## Return a new configuration set.
+
+ return { "%meta" => { allstarp => 1, ixlist => [], ixmap => {} } };
+}
+
+sub cset_indices ($$) {
+ my ($cset, $what) = @_;
+ ## Return the list of literal indices in the configuration set CSET. If an
+ ## explicit `indices' tag is defined, then use its value (split at
+ ## whitespace). If there are explicit literal indices, return them (in the
+ ## correct order). If all indices are `*', return a single `default' item.
+ ## Otherwise report an error.
+
+ if (defined (my $it = $cset->{indices}{"*"})) {
+ return shellwords $it;
+ } else {
+ my $meta = $cset->{"%meta"};
+ $meta->{allstarp} and return "default";
+ return @{$meta->{ixlist}} if @{$meta->{ixlist}};
+ fail "no literal indices for `$what'";
+ }
+}
+
+sub cset_store ($$$$) {
+ my ($cset, $tag, $ix, $value) = @_;
+ ## Store VALUE in the configuration set CSET as the value for TAG with
+ ## index pattern IX.
+
+ my $meta = $cset->{"%meta"};
+ $ix eq "*" or $meta->{allstarp} = 0;
+ if (!$meta->{ixmap}{$ix} && literalp $ix) {
+ $meta->{ixmap}{$ix} = 1;
+ push @{$meta->{ixlist}}, $ix;
+ }
+ $cset->{$tag}{$ix} = $value;
+}
+
+sub cset_expand (\@$$);
+%PAINT = ();
+
+sub cset_lookup (\@$$;$) {
+ my ($cset, $tag, $ix, $mustp) = @_;
+ ## Look up TAG in the CSETs using index IX. Return the value corresponding
+ ## to the most specific match to IX in the earliest configuration set in
+ ## the list. If no set contains a matching value at all, then the
+ ## behaviour depends on MUSTP: if true, report an error; if false, return
+ ## undef.
+
+ if ($PAINT{$tag}) { fail "recursive expansion of `\%${tag}[$ix]'"; }
+ my $val = undef;
+ CSET: for my $cs (@$cset) {
+ defined (my $tset = $cs->{$tag}) or next CSET;
+ if (defined (my $it = $tset->{$ix})) { $val = $it; last CSET; };
+ my $pat = undef;
+ PAT: while (my ($p, $v) = each %$tset) {
+ fnmatch $p, $ix or next PAT;
+ unless (defined($pat) && fnmatch($p, $pat)) { $val = $v; $pat = $p; }
+ }
+ last CSET if defined $val;
+ }
+ if (defined $val) {
+ $PAINT{$tag} = 1;
+ my $exp = cset_expand @$cset, $ix, $val;
+ $PAINT{$tag} = 0;
+ return $exp;
+ } elsif ($mustp) { fail "variable `$tag\[$ix]' undefined"; }
+ else { return undef; }
+}
+
+sub cset_fetch (\%\@$$@) {
+ my ($a, $cset, $mustp, $ix, @tag) = @_;
+ ## Populate the hash A with values retrieved from the CSETs. Each TAG is
+ ## looked up with index IX, and if a value is found, it is stored in A with
+ ## key TAG. If MUSTP is true, then an error is reported unless a value is
+ ## found for every TAG.
+
+ for my $tag (@tag) {
+ my $v = cset_lookup @$cset, $tag, $ix, $mustp;
+ $a->{$tag} = $v if defined $v;
+ }
+}
+
+sub cset_expand (\@$$) {
+ my ($cset, $ix, $s) = @_;
+ ## Expand placeholders %TAG or %{TAG} in the string S, relative to the
+ ## CSETs and the index IX.
+
+ $s =~ s{
+ % (?: (?P<NAME>\w+)
+ | \{ (?P<NAME>\w+) \} )
+ }{
+ cset_lookup(@$cset, $+{NAME}, $ix, 1)
+ }xeg;
+ return $s;
+}
+
+###--------------------------------------------------------------------------
+### Parsing.
+
+our %DEFAULT = %{+cset_new}; # Default assignments.
+our %CSET = (); # Map of distro configuration sets.
+our @SUB = (); # List of subscriptions.
+
+sub parse ($) {
+ my ($fn) = @_;
+ ## Parse the file named by FN and add definitions to the tables %DEFAULT,
+ ## %CSET and @SUB.
+
+ ## Open the file and prepare to read.
+ open my $fh, "<", $fn or fail "open `$fn': $!";
+ my $ln = 0;
+
+ ## Report a syntax error, citing the offending file and line.
+ sub syntax { fail "$fn:$ln: $_[0]"; }
+
+ ## Report an error about an indented line with no stanza header.
+ sub nomode { syntax "missing stanza header" };
+ my $mode = \&nomode;
+
+ ## Parse an assignment LINE and store it in CSET.
+ sub assign {
+ my ($cset, $line) = @_;
+ $line =~ m{
+ ^ \s*
+ (?P<TAG> \w+)
+ (?: \[ (?P<IX> [^\]]+) \] )?
+ \s* = \s*
+ (?P<VALUE> | \S | \S.*\S)
+ \s* $
+ }x or syntax "invalid assignment";
+ cset_store $cset, $+{TAG}, $+{IX} // "*", $+{VALUE};
+ }
+
+ ## Parse a subscription LINE and store it in @SUB.
+ sub subscribe {
+ my ($line) = @_;
+ my @w = shellwords $line;
+ my @dist = ();
+ while (my $w = shift @w) { last if $w eq ":"; push @dist, $w; }
+ @w and @dist or syntax "empty distribution or release list";
+ push @SUB, [\@dist, \@w];
+ }
+
+ for (;;) {
+
+ ## Read a line. If it's empty or a comment then ignore it.
+ defined (my $line = readline $fh)
+ or last;
+ $ln++;
+ next if $line =~ /^\s*($|\#)/;
+ chomp $line;
+
+ ## If the line begins with whitespace then process it according to the
+ ## prevailing mode.
+ if ($line =~ /^\s/) {
+ $mode->($line);
+ next;
+ }
+
+ ## Split the header line into tokens and determine an action.
+ my @w = shellwords $line;
+ $mode = \&nomode;
+ if ($w[0] eq "distribution") {
+ @w == 2 or syntax "usage: distribution NAME";
+ my $cset = $CSET{$w[1]} //= cset_new;
+ $mode = sub { assign $cset, @_ };
+ } elsif ($w[0] eq "default") {
+ @w == 1 or syntax "usage: default";
+ $mode = sub { assign \%DEFAULT, @_ };
+ } elsif ($w[0] eq "subscribe") {
+ @w == 1 or syntax "usage: subscribe";
+ $mode = \&subscribe;
+ } else {
+ syntax "unknown toplevel directive `$w[0]'";
+ }
+ }
+
+ ## Done. Make sure we read everything.
+ close $fh or die "read `$fn': $!";
+}
+
+###--------------------------------------------------------------------------
+### Main program.
+
+our $USAGE = "usage: $QUIS FILE|DIR ...";
+sub version { print "$QUIS, version $VERSION\n"; }
+sub help {
+ print <<EOF;
+$USAGE
+
+Options:
+ -h, --help Show this help text.
+ -v, --version Show the program version number.
+EOF
+}
+
+GetOptions('help|h|?' => sub { version; help; exit; },
+ 'version|v' => sub { version; exit; })
+ and @ARGV >= 1
+ or do { print STDERR $USAGE, "\n"; exit 1; };
+
+## Read the input files.
+for my $fn (@ARGV) {
+ if (-d $fn) {
+ opendir my $dh, $fn or fail "opendir `$fn': $!";
+ my @f = ();
+ FILE: while (my $f = readdir $dh) {
+ $f =~ /^[-\w.]+$/ or next FILE;
+ $f = "$fn/$f";
+ -f $f or next FILE;
+ push @f, $f;
+ }
+ closedir $dh;
+ for my $f (sort @f) { parse $f; }
+ } else {
+ parse $fn;
+ }
+}
+
+## Start writing output.
+print <<EOF;
+### -*-conf-*- GENERATED by $QUIS: DO NOT EDIT!
+###
+### Package sources.
+
+EOF
+
+## Work through the subscription list.
+for my $pair (@SUB) {
+ my @dist = @{$pair->[0]};
+ my @rel = @{$pair->[1]};
+
+ ## Write a stanza for each distribution.
+ for my $dist (@dist) {
+
+ ## Find the configuration set for the distribution.
+ defined (my $cset = $CSET{$dist})
+ or fail "unknown distribution `$dist'";
+ my @ix = cset_indices $cset, $dist;
+
+ ## Print a banner to break up the monotony.
+ my %a = ();
+ cset_fetch %a, @{[$cset, \%DEFAULT]}, 0, "default", qw(banner);
+ print "###", "-" x 74, "\n";
+ print "### ", $a{banner}, "\n" if exists $a{banner};
+
+ ## Write a paragraph for each release.
+ for my $rel (@rel) {
+
+ ## Write a header.
+ print "\n## $rel\n";
+
+ ## Prepare a list of configuration sections to provide variables for
+ ## expansion.
+ my @cset = ({ RELEASE => { "*" => $rel } }, $cset, \%DEFAULT);
+
+ ## Work through each index.
+ IX: for my $ix (@ix) {
+
+ ## Fetch properties from the configuration set.
+ %a = (options => undef,
+ release => $rel,
+ releases => "*",
+ types => "deb deb-src");
+ cset_fetch %a, @cset, 1, $ix, qw(uri components);
+ cset_fetch %a, @cset, 0, $ix, qw(types options release releases);
+
+ ## Check that this release matches the index.
+ my $matchp = 0;
+ for my $rpat (shellwords $a{releases}) {
+ $matchp = 1, last if fnmatch $rpat, $rel;
+ }
+ next IX unless $matchp;
+
+ ## Build an output line.
+ my $out = "";
+ defined (my $opt = $a{options}) and $out .= "[ $opt ] ";
+ $out .= "$a{uri} $a{release} $a{components}";
+
+ ## Canonify whitespace.
+ $out =~ s/^\s+//; $out =~ s/\s+$//; $out =~ s/\s+/ /;
+
+ ## Write out the necessary
+ print "$_ $out\n" for shellwords $a{types};
+ }
+ }
+ print "\n";
+ }
+}
+
+## Write a trailer.
+print "###----- That's all, folks ", "-" x 50, "\n";
+print "### GENERATED by $QUIS: NO NOT EDIT!\n";
+
+###--------------------------------------------------------------------------
+### Documentation.
+
+=head1 NAME
+
+mkaptsrc - generate APT `sources.list' files
+
+=head1 SYNOPSIS
+
+B<mkaptsrc> I<file>|I<dir>...
+
+=head1 DESCRIPTION
+
+The B<mkaptsrc> progrem generates an APT F<sources.list> file from a
+collection of configuration files. It allows a site to use a single master
+file defining all (or most) of the available archives, while allowing each
+individiual host to describe succinctly which archives it actually wants to
+track.
+
+The command line arguments are a list of one or more filenames and/or
+directories. The program reads the files one by one, in order; a directory
+stands for all of the regular files it contains whose names consist only of
+alphanumeric characters, dots C<.>, underscores C<_>, and hyphens C<->, in
+ascending lexicographic order. (Nested subdirectories are ignored.) Later
+files can override settings from earlier ones.
+
+=head2 Command-line syntax
+
+The following command-line options are recognized.
+
+=over
+
+=item B<-h>, B<--help>
+
+Print help about the program to standard output, and exit.
+
+=item B<-v>, B<--version>
+
+Print B<mkaptsrc>'s version number to standard output, and exit.
+
+=back
+
+=head2 Configuration syntax
+
+The configuration files are split into stanze. Each stanza begins with an
+unindented header line, followed by zero or more indented body lines. Blank
+lines (containing only whitespace) and comments (whose first non-whitespace
+character is C<#>) are ignored E<ndash> and in particular are not considered
+when determining the boundaries of stanze. It is not possible to split a
+stanza between two files.
+
+A I<distribution stanza> consists of a line
+
+=over
+
+B<distribution> I<dist>
+
+=back
+
+followed by a number of indented assignments
+
+=over
+
+I<tag> = I<value>
+
+=back
+
+or
+
+=over
+
+I<tag>B<[>I<pat>B<]> = I<value>
+
+=back
+
+Here, I<dist> is a name for this distribution; this name is entirely internal
+to the configuration and has no external meaning. Several stanze may use the
+same I<dist>: the effect is the same as a single big stanza containing all of
+the assignments in order.
+
+Each assignment line sets the value of a I<tag> for the distribution; if the
+I<tag> has already been assigned a value then the old value is forgotten.
+The optional I<pat> may be used to assign different values to the same tag
+according to different I<contexts>, distinguished by glob patterns: see the
+description below. Omitting the I<pat> is equivalent to using the wildcard
+pattern C<*>.
+
+A I<default stanza> consists of a line
+
+=over
+
+B<defaults>
+
+=back
+
+followed by assignments as for a distribution stanza. Again, there may be
+many default stanze, and the effect is the same as a single big default
+stanza containing all of the assignments in order. During output, tags are
+looked up first in the relevant distribution, and if there no matching
+assignments then the B<defaults> assignments are searched.
+
+A I<subscription stanza> consists of a line
+
+=over
+
+B<subscribe>
+
+=back
+
+followed by indented subscription lines
+
+=over
+
+I<dist> [I<dist> ...] B<:> I<release> [I<release> ...]
+
+=back
+
+Such a line is equivalent to a sequence of lines
+
+=over
+
+I<dist> B<:> I<release> [I<release> ...]
+
+=back
+
+one for each I<dist>, in order.
+
+It is permitted for several lines to name the same I<dist>, though currently
+the behaviour is not good: they are treated entirely independently. The
+author is not sure what the correct behaviour ought to be.
+
+=head2 Tag lookup and value expansion
+
+The output of B<mkaptsrc> is largely constructed by looking up tags and using
+their values. A tag is always looked up in a particular I<distribution> and
+with reference to a particular I<context>. Contexts are named with an
+I<index>. The resulting value is the last assignment in the distribution's
+stanze whose tag is equal to the tag being looked up, and whose pattern is
+either absent or matches the context index. If there is no matching
+assignment, then the default assignments are checked, and again the last
+match is used. If there is no default assignment either, then the lookup
+fails; this might or might not be an error.
+
+Once the value has been found, it is I<expanded> before use. Any
+placeholders of the form B<%>I<tag> or B<%{>I<tag>B<}> (the latter may be
+used to distinguish the I<tag> name from any immediately following text) are
+replaced by the (expanded) value of the I<tag>, using the same distribution
+and context as the original lookup. It is a fatal error for a lookup of a
+tag to fail during expansion. Recursive expansion is forbidden.
+
+There are some special tags given values by B<mkaptsrc>. Their names are
+written in all upper-case.
+
+=head2 Output
+
+The output is always written to stdout. It begins with a header comment
+(which you can't modify), including a warning that the file is generated and
+shouldn't be edited.
+
+The output is split into sections, one for each I<dist> in the subcription
+stanze. Each section begins with a comment banner, whose text is the result
+of looking up the tag B<banner> in the distribution, using the context index
+B<default>; if the lookup fails then no banner text is added.
+
+The distribution section is split into paragraphs, one for each I<release>
+listed in the subscription line, and headed with a comment naming the
+I<release>. The contents of the paragraph are determined by assignments in
+the distribution stanza for I<dist>.
+
+The set of context indices for the paragraph is determined, as follows.
+
+=over
+
+=item *
+
+The tag B<indices> is looked up in the distribution I<dist>. This lookup is
+special in three ways: firstly, lookup will I<not> fall back to the
+B<defaults> assignments; secondly, only assignments with no pattern (or,
+equivalently, with pattern C<*>) are examined; and, thirdly, the result is
+I<not> subject to expansion. If a value is found, then the context indices
+are precisely the space-separated words of the value.
+
+=item *
+
+If there assignments in the distribution I<dist> whose patterns are
+I<literal> E<ndash> i.e., contain no metacharacters C<*>, C<?>, C<[>, or
+C<\\> E<ndash> then the context indices are precisely these literal patterns,
+in the order in which they first appeared.
+
+=item *
+
+If all of the assignments for the distribution I<dist> have no pattern (or,
+equivalently, have pattern C<*>), then there is exactly one context index
+B<default>.
+
+=item *
+
+Otherwise the situation is a fatal error. You should resolve this unlikely
+situation by setting an explicit B<indices> value.
+
+=back
+
+The contexts are now processed in turn. Each lookup described below happens
+in the distribution I<dist>, with respect to the context being processed.
+Furthermore, the special tag B<RELEASE> is given the value I<release>.
+
+The tag B<releases> is looked up, and split into a space-separated sequence
+of glob patterns. If the I<release> doesn't match any of these patterns then
+the context is ignored. (If the lookup fails, the context is always used,
+as if the value had been C<*>.)
+
+Finally, a sequence of lines is written, of the form
+
+=over
+
+I<type> S<B<[> I<options> B<]>> I<uri> I<release> I<components>
+
+=back
+
+one for each word in the value of B<types>, defaulting to B<deb> B<deb-src>.
+Other pieces correspond to the values of tags to be looked up: I<release>
+defaults to the name provided in the B<subscribe> stanza; if I<options> is
+omitted then there will be no S<B<[> I<options> B<]>> piece; it is a fatal
+error if other lookups fail.
+
+=head1 EXAMPLES
+
+The package repository for the official Linux Spotify client can be described
+as follows.
+
+ distribution spotify
+ banner = Spotify client for Linux.
+ uri = http://repository.spotify.com/
+ components = non-free
+ types = deb
+
+ subscribe
+ spotify : stable
+
+This produces the output
+
+ ###------------------------------------------------------------
+ ### Spotify client for Linux.
+
+ ## stable
+ deb http://repository.spotify.com/ stable non-free
+
+As a more complex example, I describe the official Debian package archive as
+follows.
+
+ default
+ debmirror = http://mirror.distorted.org.uk
+ debsecurity = http://security.debian.org
+
+ distribution debian
+ banner = Debian GNU/Linux.
+ uri[base] = %debmirror/debian/
+ uri[security-local] = %debmirror/debian-security/
+ uri[security-upstream] = %debsecurity/debian-security/
+ release[security-*] = %RELEASE/updates
+ releases[security-*] = oldstable stable testing
+ components = main non-free contrib
+ components[security-*] = main
+
+ subscribe
+ debian : stable testing unstable
+
+This arranges to use my local mirror for both the main archive and for
+security updates, but I<also> to use the upstream archive for security
+updates which I might not have mirrored yet. Setting B<releases[security-*]>
+copes with the fact that there are no separate security releases for the
+B<unstable> release.
+
+On machines which are far away from my mirror, I override these settings by
+writing
+
+ distribution debian
+ debmirror = http://ftp.uk.debian.org
+ indices = base security-upstream
+
+in a host-local file (which has the effect of disabling the B<security-local>
+context implicitly defined in the base stanza.
+
+=head1 BUGS
+
+Redefinition of subscriptions currently isn't well behaved.
+
+=head1 SEE ALSO
+
+L<sources.list(5)>
+
+=head1 AUTHOR
+
+Mark Wooding <mdw@distorted.org.uk>
+
+=cut
+
+###----- That's all, folks --------------------------------------------------