From 2920e7ba11655d5849763f79633130f2858685fc Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Fri, 14 Jun 2013 10:36:47 +0100 Subject: [PATCH] mkaptsrc: Been in service for a while, but now worth distributing. --- mkaptsrc | 645 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 645 insertions(+) create mode 100755 mkaptsrc diff --git a/mkaptsrc b/mkaptsrc new file mode 100755 index 0000000..684f964 --- /dev/null +++ b/mkaptsrc @@ -0,0 +1,645 @@ +#! /usr/bin/perl +### +### Construct an APT `sources.list' file +### +### (c) 2012 Mark Wooding +### + +###----- Licensing notice --------------------------------------------------- +### +### This program is free software; you can redistribute it and/or modify +### it under the terms of the GNU General Public License as published by +### the Free Software Foundation; either version 2 of the License, or +### (at your option) any later version. +### +### This program is distributed in the hope that it will be useful, +### but WITHOUT ANY WARRANTY; without even the implied warranty of +### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +### GNU General Public License for more details. +### +### You should have received a copy of the GNU General Public License +### along with this program; if not, write to the Free Software Foundation, +### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +use Data::Dumper; +use File::FnMatch qw(:fnmatch); +use Getopt::Long qw(:config gnu_compat bundling no_ignore_case); +use Text::ParseWords; + +###-------------------------------------------------------------------------- +### Miscellaneous utilities. + +(our $QUIS = $0) =~ s:.*/::; +our $VERSION = "1.0.0"; + +sub fail ($) { + my ($msg) = @_; + ## Report a fatal error MSG and exit. + + print STDERR "$QUIS: $msg\n"; + exit 1; +} + +sub literalp ($) { + my ($s) = @_; + ## Answer whether the string S is free of metacharacters. + + return $s !~ /[\\?*[]/; +} + +###-------------------------------------------------------------------------- +### Configuration sets. + +sub cset_new () { + ## Return a new configuration set. + + return { "%meta" => { allstarp => 1, ixlist => [], ixmap => {} } }; +} + +sub cset_indices ($$) { + my ($cset, $what) = @_; + ## Return the list of literal indices in the configuration set CSET. If an + ## explicit `indices' tag is defined, then use its value (split at + ## whitespace). If there are explicit literal indices, return them (in the + ## correct order). If all indices are `*', return a single `default' item. + ## Otherwise report an error. + + if (defined (my $it = $cset->{indices}{"*"})) { + return shellwords $it; + } else { + my $meta = $cset->{"%meta"}; + $meta->{allstarp} and return "default"; + return @{$meta->{ixlist}} if @{$meta->{ixlist}}; + fail "no literal indices for `$what'"; + } +} + +sub cset_store ($$$$) { + my ($cset, $tag, $ix, $value) = @_; + ## Store VALUE in the configuration set CSET as the value for TAG with + ## index pattern IX. + + my $meta = $cset->{"%meta"}; + $ix eq "*" or $meta->{allstarp} = 0; + if (!$meta->{ixmap}{$ix} && literalp $ix) { + $meta->{ixmap}{$ix} = 1; + push @{$meta->{ixlist}}, $ix; + } + $cset->{$tag}{$ix} = $value; +} + +sub cset_expand (\@$$); +%PAINT = (); + +sub cset_lookup (\@$$;$) { + my ($cset, $tag, $ix, $mustp) = @_; + ## Look up TAG in the CSETs using index IX. Return the value corresponding + ## to the most specific match to IX in the earliest configuration set in + ## the list. If no set contains a matching value at all, then the + ## behaviour depends on MUSTP: if true, report an error; if false, return + ## undef. + + if ($PAINT{$tag}) { fail "recursive expansion of `\%${tag}[$ix]'"; } + my $val = undef; + CSET: for my $cs (@$cset) { + defined (my $tset = $cs->{$tag}) or next CSET; + if (defined (my $it = $tset->{$ix})) { $val = $it; last CSET; }; + my $pat = undef; + PAT: while (my ($p, $v) = each %$tset) { + fnmatch $p, $ix or next PAT; + unless (defined($pat) && fnmatch($p, $pat)) { $val = $v; $pat = $p; } + } + last CSET if defined $val; + } + if (defined $val) { + $PAINT{$tag} = 1; + my $exp = cset_expand @$cset, $ix, $val; + $PAINT{$tag} = 0; + return $exp; + } elsif ($mustp) { fail "variable `$tag\[$ix]' undefined"; } + else { return undef; } +} + +sub cset_fetch (\%\@$$@) { + my ($a, $cset, $mustp, $ix, @tag) = @_; + ## Populate the hash A with values retrieved from the CSETs. Each TAG is + ## looked up with index IX, and if a value is found, it is stored in A with + ## key TAG. If MUSTP is true, then an error is reported unless a value is + ## found for every TAG. + + for my $tag (@tag) { + my $v = cset_lookup @$cset, $tag, $ix, $mustp; + $a->{$tag} = $v if defined $v; + } +} + +sub cset_expand (\@$$) { + my ($cset, $ix, $s) = @_; + ## Expand placeholders %TAG or %{TAG} in the string S, relative to the + ## CSETs and the index IX. + + $s =~ s{ + % (?: (?P\w+) + | \{ (?P\w+) \} ) + }{ + cset_lookup(@$cset, $+{NAME}, $ix, 1) + }xeg; + return $s; +} + +###-------------------------------------------------------------------------- +### Parsing. + +our %DEFAULT = %{+cset_new}; # Default assignments. +our %CSET = (); # Map of distro configuration sets. +our @SUB = (); # List of subscriptions. + +sub parse ($) { + my ($fn) = @_; + ## Parse the file named by FN and add definitions to the tables %DEFAULT, + ## %CSET and @SUB. + + ## Open the file and prepare to read. + open my $fh, "<", $fn or fail "open `$fn': $!"; + my $ln = 0; + + ## Report a syntax error, citing the offending file and line. + sub syntax { fail "$fn:$ln: $_[0]"; } + + ## Report an error about an indented line with no stanza header. + sub nomode { syntax "missing stanza header" }; + my $mode = \&nomode; + + ## Parse an assignment LINE and store it in CSET. + sub assign { + my ($cset, $line) = @_; + $line =~ m{ + ^ \s* + (?P \w+) + (?: \[ (?P [^\]]+) \] )? + \s* = \s* + (?P | \S | \S.*\S) + \s* $ + }x or syntax "invalid assignment"; + cset_store $cset, $+{TAG}, $+{IX} // "*", $+{VALUE}; + } + + ## Parse a subscription LINE and store it in @SUB. + sub subscribe { + my ($line) = @_; + my @w = shellwords $line; + my @dist = (); + while (my $w = shift @w) { last if $w eq ":"; push @dist, $w; } + @w and @dist or syntax "empty distribution or release list"; + push @SUB, [\@dist, \@w]; + } + + for (;;) { + + ## Read a line. If it's empty or a comment then ignore it. + defined (my $line = readline $fh) + or last; + $ln++; + next if $line =~ /^\s*($|\#)/; + chomp $line; + + ## If the line begins with whitespace then process it according to the + ## prevailing mode. + if ($line =~ /^\s/) { + $mode->($line); + next; + } + + ## Split the header line into tokens and determine an action. + my @w = shellwords $line; + $mode = \&nomode; + if ($w[0] eq "distribution") { + @w == 2 or syntax "usage: distribution NAME"; + my $cset = $CSET{$w[1]} //= cset_new; + $mode = sub { assign $cset, @_ }; + } elsif ($w[0] eq "default") { + @w == 1 or syntax "usage: default"; + $mode = sub { assign \%DEFAULT, @_ }; + } elsif ($w[0] eq "subscribe") { + @w == 1 or syntax "usage: subscribe"; + $mode = \&subscribe; + } else { + syntax "unknown toplevel directive `$w[0]'"; + } + } + + ## Done. Make sure we read everything. + close $fh or die "read `$fn': $!"; +} + +###-------------------------------------------------------------------------- +### Main program. + +our $USAGE = "usage: $QUIS FILE|DIR ..."; +sub version { print "$QUIS, version $VERSION\n"; } +sub help { + print < sub { version; help; exit; }, + 'version|v' => sub { version; exit; }) + and @ARGV >= 1 + or do { print STDERR $USAGE, "\n"; exit 1; }; + +## Read the input files. +for my $fn (@ARGV) { + if (-d $fn) { + opendir my $dh, $fn or fail "opendir `$fn': $!"; + my @f = (); + FILE: while (my $f = readdir $dh) { + $f =~ /^[-\w.]+$/ or next FILE; + $f = "$fn/$f"; + -f $f or next FILE; + push @f, $f; + } + closedir $dh; + for my $f (sort @f) { parse $f; } + } else { + parse $fn; + } +} + +## Start writing output. +print <[0]}; + my @rel = @{$pair->[1]}; + + ## Write a stanza for each distribution. + for my $dist (@dist) { + + ## Find the configuration set for the distribution. + defined (my $cset = $CSET{$dist}) + or fail "unknown distribution `$dist'"; + my @ix = cset_indices $cset, $dist; + + ## Print a banner to break up the monotony. + my %a = (); + cset_fetch %a, @{[$cset, \%DEFAULT]}, 0, "default", qw(banner); + print "###", "-" x 74, "\n"; + print "### ", $a{banner}, "\n" if exists $a{banner}; + + ## Write a paragraph for each release. + for my $rel (@rel) { + + ## Write a header. + print "\n## $rel\n"; + + ## Prepare a list of configuration sections to provide variables for + ## expansion. + my @cset = ({ RELEASE => { "*" => $rel } }, $cset, \%DEFAULT); + + ## Work through each index. + IX: for my $ix (@ix) { + + ## Fetch properties from the configuration set. + %a = (options => undef, + release => $rel, + releases => "*", + types => "deb deb-src"); + cset_fetch %a, @cset, 1, $ix, qw(uri components); + cset_fetch %a, @cset, 0, $ix, qw(types options release releases); + + ## Check that this release matches the index. + my $matchp = 0; + for my $rpat (shellwords $a{releases}) { + $matchp = 1, last if fnmatch $rpat, $rel; + } + next IX unless $matchp; + + ## Build an output line. + my $out = ""; + defined (my $opt = $a{options}) and $out .= "[ $opt ] "; + $out .= "$a{uri} $a{release} $a{components}"; + + ## Canonify whitespace. + $out =~ s/^\s+//; $out =~ s/\s+$//; $out =~ s/\s+/ /; + + ## Write out the necessary + print "$_ $out\n" for shellwords $a{types}; + } + } + print "\n"; + } +} + +## Write a trailer. +print "###----- That's all, folks ", "-" x 50, "\n"; +print "### GENERATED by $QUIS: NO NOT EDIT!\n"; + +###-------------------------------------------------------------------------- +### Documentation. + +=head1 NAME + +mkaptsrc - generate APT `sources.list' files + +=head1 SYNOPSIS + +B I|I... + +=head1 DESCRIPTION + +The B progrem generates an APT F file from a +collection of configuration files. It allows a site to use a single master +file defining all (or most) of the available archives, while allowing each +individiual host to describe succinctly which archives it actually wants to +track. + +The command line arguments are a list of one or more filenames and/or +directories. The program reads the files one by one, in order; a directory +stands for all of the regular files it contains whose names consist only of +alphanumeric characters, dots C<.>, underscores C<_>, and hyphens C<->, in +ascending lexicographic order. (Nested subdirectories are ignored.) Later +files can override settings from earlier ones. + +=head2 Command-line syntax + +The following command-line options are recognized. + +=over + +=item B<-h>, B<--help> + +Print help about the program to standard output, and exit. + +=item B<-v>, B<--version> + +Print B's version number to standard output, and exit. + +=back + +=head2 Configuration syntax + +The configuration files are split into stanze. Each stanza begins with an +unindented header line, followed by zero or more indented body lines. Blank +lines (containing only whitespace) and comments (whose first non-whitespace +character is C<#>) are ignored E and in particular are not considered +when determining the boundaries of stanze. It is not possible to split a +stanza between two files. + +A I consists of a line + +=over + +B I + +=back + +followed by a number of indented assignments + +=over + +I = I + +=back + +or + +=over + +IB<[>IB<]> = I + +=back + +Here, I is a name for this distribution; this name is entirely internal +to the configuration and has no external meaning. Several stanze may use the +same I: the effect is the same as a single big stanza containing all of +the assignments in order. + +Each assignment line sets the value of a I for the distribution; if the +I has already been assigned a value then the old value is forgotten. +The optional I may be used to assign different values to the same tag +according to different I, distinguished by glob patterns: see the +description below. Omitting the I is equivalent to using the wildcard +pattern C<*>. + +A I consists of a line + +=over + +B + +=back + +followed by assignments as for a distribution stanza. Again, there may be +many default stanze, and the effect is the same as a single big default +stanza containing all of the assignments in order. During output, tags are +looked up first in the relevant distribution, and if there no matching +assignments then the B assignments are searched. + +A I consists of a line + +=over + +B + +=back + +followed by indented subscription lines + +=over + +I [I ...] B<:> I [I ...] + +=back + +Such a line is equivalent to a sequence of lines + +=over + +I B<:> I [I ...] + +=back + +one for each I, in order. + +It is permitted for several lines to name the same I, though currently +the behaviour is not good: they are treated entirely independently. The +author is not sure what the correct behaviour ought to be. + +=head2 Tag lookup and value expansion + +The output of B is largely constructed by looking up tags and using +their values. A tag is always looked up in a particular I and +with reference to a particular I. Contexts are named with an +I. The resulting value is the last assignment in the distribution's +stanze whose tag is equal to the tag being looked up, and whose pattern is +either absent or matches the context index. If there is no matching +assignment, then the default assignments are checked, and again the last +match is used. If there is no default assignment either, then the lookup +fails; this might or might not be an error. + +Once the value has been found, it is I before use. Any +placeholders of the form B<%>I or B<%{>IB<}> (the latter may be +used to distinguish the I name from any immediately following text) are +replaced by the (expanded) value of the I, using the same distribution +and context as the original lookup. It is a fatal error for a lookup of a +tag to fail during expansion. Recursive expansion is forbidden. + +There are some special tags given values by B. Their names are +written in all upper-case. + +=head2 Output + +The output is always written to stdout. It begins with a header comment +(which you can't modify), including a warning that the file is generated and +shouldn't be edited. + +The output is split into sections, one for each I in the subcription +stanze. Each section begins with a comment banner, whose text is the result +of looking up the tag B in the distribution, using the context index +B; if the lookup fails then no banner text is added. + +The distribution section is split into paragraphs, one for each I +listed in the subscription line, and headed with a comment naming the +I. The contents of the paragraph are determined by assignments in +the distribution stanza for I. + +The set of context indices for the paragraph is determined, as follows. + +=over + +=item * + +The tag B is looked up in the distribution I. This lookup is +special in three ways: firstly, lookup will I fall back to the +B assignments; secondly, only assignments with no pattern (or, +equivalently, with pattern C<*>) are examined; and, thirdly, the result is +I subject to expansion. If a value is found, then the context indices +are precisely the space-separated words of the value. + +=item * + +If there assignments in the distribution I whose patterns are +I E i.e., contain no metacharacters C<*>, C, C<[>, or +C<\\> E then the context indices are precisely these literal patterns, +in the order in which they first appeared. + +=item * + +If all of the assignments for the distribution I have no pattern (or, +equivalently, have pattern C<*>), then there is exactly one context index +B. + +=item * + +Otherwise the situation is a fatal error. You should resolve this unlikely +situation by setting an explicit B value. + +=back + +The contexts are now processed in turn. Each lookup described below happens +in the distribution I, with respect to the context being processed. +Furthermore, the special tag B is given the value I. + +The tag B is looked up, and split into a space-separated sequence +of glob patterns. If the I doesn't match any of these patterns then +the context is ignored. (If the lookup fails, the context is always used, +as if the value had been C<*>.) + +Finally, a sequence of lines is written, of the form + +=over + +I S I B<]>> I I I + +=back + +one for each word in the value of B, defaulting to B B. +Other pieces correspond to the values of tags to be looked up: I +defaults to the name provided in the B stanza; if I is +omitted then there will be no S I B<]>> piece; it is a fatal +error if other lookups fail. + +=head1 EXAMPLES + +The package repository for the official Linux Spotify client can be described +as follows. + + distribution spotify + banner = Spotify client for Linux. + uri = http://repository.spotify.com/ + components = non-free + types = deb + + subscribe + spotify : stable + +This produces the output + + ###------------------------------------------------------------ + ### Spotify client for Linux. + + ## stable + deb http://repository.spotify.com/ stable non-free + +As a more complex example, I describe the official Debian package archive as +follows. + + default + debmirror = http://mirror.distorted.org.uk + debsecurity = http://security.debian.org + + distribution debian + banner = Debian GNU/Linux. + uri[base] = %debmirror/debian/ + uri[security-local] = %debmirror/debian-security/ + uri[security-upstream] = %debsecurity/debian-security/ + release[security-*] = %RELEASE/updates + releases[security-*] = oldstable stable testing + components = main non-free contrib + components[security-*] = main + + subscribe + debian : stable testing unstable + +This arranges to use my local mirror for both the main archive and for +security updates, but I to use the upstream archive for security +updates which I might not have mirrored yet. Setting B +copes with the fact that there are no separate security releases for the +B release. + +On machines which are far away from my mirror, I override these settings by +writing + + distribution debian + debmirror = http://ftp.uk.debian.org + indices = base security-upstream + +in a host-local file (which has the effect of disabling the B +context implicitly defined in the base stanza. + +=head1 BUGS + +Redefinition of subscriptions currently isn't well behaved. + +=head1 SEE ALSO + +L + +=head1 AUTHOR + +Mark Wooding + +=cut + +###----- That's all, folks -------------------------------------------------- -- 2.11.0