#! /usr/bin/perl ### ### Construct an APT `sources.list' file ### ### (c) 2012 Mark Wooding ### ###----- Licensing notice --------------------------------------------------- ### ### This program is free software; you can redistribute it and/or modify ### it under the terms of the GNU General Public License as published by ### the Free Software Foundation; either version 2 of the License, or ### (at your option) any later version. ### ### This program is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU General Public License for more details. ### ### You should have received a copy of the GNU General Public License ### along with this program; if not, write to the Free Software Foundation, ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. use Data::Dumper; use File::FnMatch qw(:fnmatch); use Getopt::Long qw(:config gnu_compat bundling no_ignore_case); use Text::ParseWords; ###-------------------------------------------------------------------------- ### Miscellaneous utilities. (our $QUIS = $0) =~ s:.*/::; our $VERSION = "1.0.0"; sub fail ($) { my ($msg) = @_; ## Report a fatal error MSG and exit. print STDERR "$QUIS: $msg\n"; exit 1; } sub literalp ($) { my ($s) = @_; ## Answer whether the string S is free of metacharacters. return $s !~ /[\\?*[]/; } ###-------------------------------------------------------------------------- ### Configuration sets. sub cset_new () { ## Return a new configuration set. return { "%meta" => { allstarp => 1, ixlist => [], ixmap => {} } }; } sub cset_indices ($$) { my ($cset, $what) = @_; ## Return the list of literal indices in the configuration set CSET. If an ## explicit `indices' tag is defined, then use its value (split at ## whitespace). If there are explicit literal indices, return them (in the ## correct order). If all indices are `*', return a single `default' item. ## Otherwise report an error. if (defined (my $it = $cset->{indices}{"*"})) { return shellwords $it; } else { my $meta = $cset->{"%meta"}; $meta->{allstarp} and return "default"; return @{$meta->{ixlist}} if @{$meta->{ixlist}}; fail "no literal indices for `$what'"; } } sub cset_store ($$$$) { my ($cset, $tag, $ix, $value) = @_; ## Store VALUE in the configuration set CSET as the value for TAG with ## index pattern IX. my $meta = $cset->{"%meta"}; $ix eq "*" or $meta->{allstarp} = 0; if (!$meta->{ixmap}{$ix} && literalp $ix) { $meta->{ixmap}{$ix} = 1; push @{$meta->{ixlist}}, $ix; } $cset->{$tag}{$ix} = $value; } sub cset_expand (\@$$); %PAINT = (); sub cset_lookup (\@$$;$) { my ($cset, $tag, $ix, $mustp) = @_; ## Look up TAG in the CSETs using index IX. Return the value corresponding ## to the most specific match to IX in the earliest configuration set in ## the list. If no set contains a matching value at all, then the ## behaviour depends on MUSTP: if true, report an error; if false, return ## undef. if ($PAINT{$tag}) { fail "recursive expansion of `\%${tag}[$ix]'"; } my $val = undef; CSET: for my $cs (@$cset) { defined (my $tset = $cs->{$tag}) or next CSET; if (defined (my $it = $tset->{$ix})) { $val = $it; last CSET; }; my $pat = undef; PAT: while (my ($p, $v) = each %$tset) { fnmatch $p, $ix or next PAT; unless (defined($pat) && fnmatch($p, $pat)) { $val = $v; $pat = $p; } } last CSET if defined $val; } if (defined $val) { local $PAINT{$tag} = 1; my $exp = cset_expand @$cset, $ix, $val; return $exp; } elsif ($mustp) { fail "variable `$tag\[$ix]' undefined"; } else { return undef; } } sub cset_fetch (\%\@$$@) { my ($a, $cset, $mustp, $ix, @tag) = @_; ## Populate the hash A with values retrieved from the CSETs. Each TAG is ## looked up with index IX, and if a value is found, it is stored in A with ## key TAG. If MUSTP is true, then an error is reported unless a value is ## found for every TAG. for my $tag (@tag) { my $v = cset_lookup @$cset, $tag, $ix, $mustp; $a->{$tag} = $v if defined $v; } } sub cset_expand (\@$$) { my ($cset, $ix, $s) = @_; ## Expand placeholders %TAG or %{TAG} in the string S, relative to the ## CSETs and the index IX. $s =~ s{ % (?: (?P\w+) | \{ (?P\w+) \} ) }{ cset_lookup(@$cset, $+{NAME}, $ix, 1) }xeg; return $s; } ###-------------------------------------------------------------------------- ### Parsing. our %DEFAULT = %{+cset_new}; # Default assignments. our %CSET = (); # Map of distro configuration sets. our @SUB = (); # List of subscriptions. sub parse ($) { my ($fn) = @_; ## Parse the file named by FN and add definitions to the tables %DEFAULT, ## %CSET and @SUB. ## Open the file and prepare to read. open my $fh, "<", $fn or fail "open `$fn': $!"; my $ln = 0; ## Report a syntax error, citing the offending file and line. my $syntax = sub { fail "$fn:$ln: $_[0]" }; ## Report an error about an indented line with no stanza header. my $nomode = sub { $syntax->("missing stanza header") }; my $mode = $nomode; ## Parse an assignment LINE and store it in CSET. my $assign = sub { my ($cset, $line) = @_; $line =~ m{ ^ \s* (?P \w+) (?: \[ (?P [^\]]+) \] )? \s* = \s* (?P | \S | \S.*\S) \s* $ }x or $syntax->("invalid assignment"); cset_store $cset, $+{TAG}, $+{IX} // "*", $+{VALUE}; }; ## Parse a subscription LINE and store it in @SUB. my $subscribe = sub { my ($line) = @_; my @w = shellwords $line; my @dist = (); while (my $w = shift @w) { last if $w eq ":"; push @dist, $w; } @w and @dist or $syntax->("empty distribution or release list"); push @SUB, [\@dist, \@w]; }; for (;;) { ## Read a line. If it's empty or a comment then ignore it. defined (my $line = readline $fh) or last; $ln++; next if $line =~ /^\s*($|\#)/; chomp $line; ## If the line begins with whitespace then process it according to the ## prevailing mode. if ($line =~ /^\s/) { $mode->($line); next; } ## Split the header line into tokens and determine an action. my @w = shellwords $line; $mode = $nomode; if ($w[0] eq "distribution") { @w == 2 or $syntax->("usage: distribution NAME"); my $cset = $CSET{$w[1]} //= cset_new; $mode = sub { $assign->($cset, @_) }; } elsif ($w[0] eq "default") { @w == 1 or $syntax->("usage: default"); $mode = sub { $assign->(\%DEFAULT, @_) }; } elsif ($w[0] eq "subscribe") { @w == 1 or $syntax->("usage: subscribe"); $mode = $subscribe; } else { $syntax->("unknown toplevel directive `$w[0]'"); } } ## Done. Make sure we read everything. close $fh or die "read `$fn': $!"; } ###-------------------------------------------------------------------------- ### Main program. our $USAGE = "usage: $QUIS FILE|DIR ..."; sub version { print "$QUIS, version $VERSION\n"; } sub help { print < sub { version; help; exit; }, 'version|v' => sub { version; exit; }) and @ARGV >= 1 or do { print STDERR $USAGE, "\n"; exit 1; }; ## Read the input files. for my $fn (@ARGV) { if (-d $fn) { opendir my $dh, $fn or fail "opendir `$fn': $!"; my @f = (); FILE: while (my $f = readdir $dh) { $f =~ /^[-\w.]+$/ or next FILE; $f = "$fn/$f"; -f $f or next FILE; push @f, $f; } closedir $dh; for my $f (sort @f) { parse $f; } } else { parse $fn; } } ## Start writing output. print <[0]}; my @rel = @{$pair->[1]}; ## Write a stanza for each distribution. for my $dist (@dist) { ## Find the configuration set for the distribution. defined (my $cset = $CSET{$dist}) or fail "unknown distribution `$dist'"; my @ix = cset_indices $cset, $dist; ## Print a banner to break up the monotony. my %a = (); cset_fetch %a, @{[$cset, \%DEFAULT]}, 0, "default", qw(banner); print "###", "-" x 74, "\n"; print "### ", $a{banner}, "\n" if exists $a{banner}; ## Write a paragraph for each release. for my $rel (@rel) { ## Write a header. print "\n## $rel\n"; ## Prepare a list of configuration sections to provide variables for ## expansion. my @cset = ({ RELEASE => { "*" => $rel } }, $cset, \%DEFAULT); ## Work through each index. IX: for my $ix (@ix) { ## Fetch properties from the configuration set. %a = (options => undef, release => $rel, releases => "*", types => "deb deb-src"); cset_fetch %a, @cset, 1, $ix, qw(uri components); cset_fetch %a, @cset, 0, $ix, qw(types options release releases); ## Check that this release matches the index. my $matchp = 0; for my $rpat (shellwords $a{releases}) { $matchp = 1, last if fnmatch $rpat, $rel; } next IX unless $matchp; ## Build an output line. my $out = ""; if (defined (my $opt = $a{options})) { $out .= "[ $opt ] "; } $out .= "$a{uri} $a{release} $a{components}"; ## Canonify whitespace. $out =~ s/^\s+//; $out =~ s/\s+$//; $out =~ s/\s+/ /; ## Write out the necessary print "$_ $out\n" for shellwords $a{types}; } } print "\n"; } } ## Write a trailer. print "###----- That's all, folks ", "-" x 50, "\n"; print "### GENERATED by $QUIS: NO NOT EDIT!\n"; ###-------------------------------------------------------------------------- ### Documentation. =head1 NAME mkaptsrc - generate APT `sources.list' files =head1 SYNOPSIS B I|I... =head1 DESCRIPTION The B progrem generates an APT F file from a collection of configuration files. It allows a site to use a single master file defining all (or most) of the available archives, while allowing each individiual host to describe succinctly which archives it actually wants to track. The command line arguments are a list of one or more filenames and/or directories. The program reads the files one by one, in order; a directory stands for all of the regular files it contains whose names consist only of alphanumeric characters, dots C<.>, underscores C<_>, and hyphens C<->, in ascending lexicographic order. (Nested subdirectories are ignored.) Later files can override settings from earlier ones. =head2 Command-line syntax The following command-line options are recognized. =over =item B<-h>, B<--help> Print help about the program to standard output, and exit. =item B<-v>, B<--version> Print B's version number to standard output, and exit. =back =head2 Configuration syntax The configuration files are split into stanze. Each stanza begins with an unindented header line, followed by zero or more indented body lines. Blank lines (containing only whitespace) and comments (whose first non-whitespace character is C<#>) are ignored E and in particular are not considered when determining the boundaries of stanze. It is not possible to split a stanza between two files. A I consists of a line =over B I =back followed by a number of indented assignments =over I = I =back or =over IB<[>IB<]> = I =back Here, I is a name for this distribution; this name is entirely internal to the configuration and has no external meaning. Several stanze may use the same I: the effect is the same as a single big stanza containing all of the assignments in order. Each assignment line sets the value of a I for the distribution; if the I has already been assigned a value then the old value is forgotten. The optional I may be used to assign different values to the same tag according to different I, distinguished by glob patterns: see the description below. Omitting the I is equivalent to using the wildcard pattern C<*>. A I consists of a line =over B =back followed by assignments as for a distribution stanza. Again, there may be many default stanze, and the effect is the same as a single big default stanza containing all of the assignments in order. During output, tags are looked up first in the relevant distribution, and if there no matching assignments then the B assignments are searched. A I consists of a line =over B =back followed by indented subscription lines =over I [I ...] B<:> I [I ...] =back Such a line is equivalent to a sequence of lines =over I B<:> I [I ...] =back one for each I, in order. It is permitted for several lines to name the same I, though currently the behaviour is not good: they are treated entirely independently. The author is not sure what the correct behaviour ought to be. =head2 Tag lookup and value expansion The output of B is largely constructed by looking up tags and using their values. A tag is always looked up in a particular I and with reference to a particular I. Contexts are named with an I. The resulting value is the last assignment in the distribution's stanze whose tag is equal to the tag being looked up, and whose pattern is either absent or matches the context index. If there is no matching assignment, then the default assignments are checked, and again the last match is used. If there is no default assignment either, then the lookup fails; this might or might not be an error. Once the value has been found, it is I before use. Any placeholders of the form B<%>I or B<%{>IB<}> (the latter may be used to distinguish the I name from any immediately following text) are replaced by the (expanded) value of the I, using the same distribution and context as the original lookup. It is a fatal error for a lookup of a tag to fail during expansion. Recursive expansion is forbidden. There are some special tags given values by B. Their names are written in all upper-case. =head2 Output The output is always written to stdout. It begins with a header comment (which you can't modify), including a warning that the file is generated and shouldn't be edited. The output is split into sections, one for each I in the subcription stanze. Each section begins with a comment banner, whose text is the result of looking up the tag B in the distribution, using the context index B; if the lookup fails then no banner text is added. The distribution section is split into paragraphs, one for each I listed in the subscription line, and headed with a comment naming the I. The contents of the paragraph are determined by assignments in the distribution stanza for I. The set of context indices for the paragraph is determined, as follows. =over =item * The tag B is looked up in the distribution I. This lookup is special in three ways: firstly, lookup will I fall back to the B assignments; secondly, only assignments with no pattern (or, equivalently, with pattern C<*>) are examined; and, thirdly, the result is I subject to expansion. If a value is found, then the context indices are precisely the space-separated words of the value. =item * If there assignments in the distribution I whose patterns are I E i.e., contain no metacharacters C<*>, C, C<[>, or C<\\> E then the context indices are precisely these literal patterns, in the order in which they first appeared. =item * If all of the assignments for the distribution I have no pattern (or, equivalently, have pattern C<*>), then there is exactly one context index B. =item * Otherwise the situation is a fatal error. You should resolve this unlikely situation by setting an explicit B value. =back The contexts are now processed in turn. Each lookup described below happens in the distribution I, with respect to the context being processed. Furthermore, the special tag B is given the value I. The tag B is looked up, and split into a space-separated sequence of glob patterns. If the I doesn't match any of these patterns then the context is ignored. (If the lookup fails, the context is always used, as if the value had been C<*>.) Finally, a sequence of lines is written, of the form =over I S I B<]>> I I I =back one for each word in the value of B, defaulting to B B. Other pieces correspond to the values of tags to be looked up: I defaults to the name provided in the B stanza; if I is omitted then there will be no S I B<]>> piece; it is a fatal error if other lookups fail. =head1 EXAMPLES The package repository for the official Linux Spotify client can be described as follows. distribution spotify banner = Spotify client for Linux. uri = http://repository.spotify.com/ components = non-free types = deb subscribe spotify : stable This produces the output ###------------------------------------------------------------ ### Spotify client for Linux. ## stable deb http://repository.spotify.com/ stable non-free As a more complex example, I describe the official Debian package archive as follows. default debmirror = http://mirror.distorted.org.uk debsecurity = http://security.debian.org distribution debian banner = Debian GNU/Linux. uri[base] = %debmirror/debian/ uri[security-local] = %debmirror/debian-security/ uri[security-upstream] = %debsecurity/debian-security/ release[security-*] = %RELEASE/updates releases[security-*] = oldstable stable testing components = main non-free contrib components[security-*] = main subscribe debian : stable testing unstable This arranges to use my local mirror for both the main archive and for security updates, but I to use the upstream archive for security updates which I might not have mirrored yet. Setting B copes with the fact that there are no separate security releases for the B release. On machines which are far away from my mirror, I override these settings by writing distribution debian debmirror = http://ftp.uk.debian.org indices = base security-upstream in a host-local file (which has the effect of disabling the B context implicitly defined in the base stanza. =head1 BUGS Redefinition of subscriptions currently isn't well behaved. =head1 SEE ALSO L =head1 AUTHOR Mark Wooding =cut ###----- That's all, folks --------------------------------------------------