mkaptsrc: Perl's scoping for `my' variables is bobbins.
[distorted-bits] / mkaptsrc
CommitLineData
2920e7ba
MW
1#! /usr/bin/perl
2###
3### Construct an APT `sources.list' file
4###
5### (c) 2012 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
10### This program is free software; you can redistribute it and/or modify
11### it under the terms of the GNU General Public License as published by
12### the Free Software Foundation; either version 2 of the License, or
13### (at your option) any later version.
14###
15### This program is distributed in the hope that it will be useful,
16### but WITHOUT ANY WARRANTY; without even the implied warranty of
17### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18### GNU General Public License for more details.
19###
20### You should have received a copy of the GNU General Public License
21### along with this program; if not, write to the Free Software Foundation,
22### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23
24use Data::Dumper;
25use File::FnMatch qw(:fnmatch);
26use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
27use Text::ParseWords;
28
29###--------------------------------------------------------------------------
30### Miscellaneous utilities.
31
32(our $QUIS = $0) =~ s:.*/::;
33our $VERSION = "1.0.0";
34
35sub fail ($) {
36 my ($msg) = @_;
37 ## Report a fatal error MSG and exit.
38
39 print STDERR "$QUIS: $msg\n";
40 exit 1;
41}
42
43sub literalp ($) {
44 my ($s) = @_;
45 ## Answer whether the string S is free of metacharacters.
46
47 return $s !~ /[\\?*[]/;
48}
49
50###--------------------------------------------------------------------------
51### Configuration sets.
52
53sub cset_new () {
54 ## Return a new configuration set.
55
56 return { "%meta" => { allstarp => 1, ixlist => [], ixmap => {} } };
57}
58
59sub cset_indices ($$) {
60 my ($cset, $what) = @_;
61 ## Return the list of literal indices in the configuration set CSET. If an
62 ## explicit `indices' tag is defined, then use its value (split at
63 ## whitespace). If there are explicit literal indices, return them (in the
64 ## correct order). If all indices are `*', return a single `default' item.
65 ## Otherwise report an error.
66
67 if (defined (my $it = $cset->{indices}{"*"})) {
68 return shellwords $it;
69 } else {
70 my $meta = $cset->{"%meta"};
71 $meta->{allstarp} and return "default";
72 return @{$meta->{ixlist}} if @{$meta->{ixlist}};
73 fail "no literal indices for `$what'";
74 }
75}
76
77sub cset_store ($$$$) {
78 my ($cset, $tag, $ix, $value) = @_;
79 ## Store VALUE in the configuration set CSET as the value for TAG with
80 ## index pattern IX.
81
82 my $meta = $cset->{"%meta"};
83 $ix eq "*" or $meta->{allstarp} = 0;
84 if (!$meta->{ixmap}{$ix} && literalp $ix) {
85 $meta->{ixmap}{$ix} = 1;
86 push @{$meta->{ixlist}}, $ix;
87 }
88 $cset->{$tag}{$ix} = $value;
89}
90
91sub cset_expand (\@$$);
92%PAINT = ();
93
94sub cset_lookup (\@$$;$) {
95 my ($cset, $tag, $ix, $mustp) = @_;
96 ## Look up TAG in the CSETs using index IX. Return the value corresponding
97 ## to the most specific match to IX in the earliest configuration set in
98 ## the list. If no set contains a matching value at all, then the
99 ## behaviour depends on MUSTP: if true, report an error; if false, return
100 ## undef.
101
102 if ($PAINT{$tag}) { fail "recursive expansion of `\%${tag}[$ix]'"; }
103 my $val = undef;
104 CSET: for my $cs (@$cset) {
105 defined (my $tset = $cs->{$tag}) or next CSET;
106 if (defined (my $it = $tset->{$ix})) { $val = $it; last CSET; };
107 my $pat = undef;
108 PAT: while (my ($p, $v) = each %$tset) {
109 fnmatch $p, $ix or next PAT;
110 unless (defined($pat) && fnmatch($p, $pat)) { $val = $v; $pat = $p; }
111 }
112 last CSET if defined $val;
113 }
114 if (defined $val) {
115 $PAINT{$tag} = 1;
116 my $exp = cset_expand @$cset, $ix, $val;
117 $PAINT{$tag} = 0;
118 return $exp;
119 } elsif ($mustp) { fail "variable `$tag\[$ix]' undefined"; }
120 else { return undef; }
121}
122
123sub cset_fetch (\%\@$$@) {
124 my ($a, $cset, $mustp, $ix, @tag) = @_;
125 ## Populate the hash A with values retrieved from the CSETs. Each TAG is
126 ## looked up with index IX, and if a value is found, it is stored in A with
127 ## key TAG. If MUSTP is true, then an error is reported unless a value is
128 ## found for every TAG.
129
130 for my $tag (@tag) {
131 my $v = cset_lookup @$cset, $tag, $ix, $mustp;
132 $a->{$tag} = $v if defined $v;
133 }
134}
135
136sub cset_expand (\@$$) {
137 my ($cset, $ix, $s) = @_;
138 ## Expand placeholders %TAG or %{TAG} in the string S, relative to the
139 ## CSETs and the index IX.
140
141 $s =~ s{
142 % (?: (?P<NAME>\w+)
143 | \{ (?P<NAME>\w+) \} )
144 }{
145 cset_lookup(@$cset, $+{NAME}, $ix, 1)
146 }xeg;
147 return $s;
148}
149
150###--------------------------------------------------------------------------
151### Parsing.
152
153our %DEFAULT = %{+cset_new}; # Default assignments.
154our %CSET = (); # Map of distro configuration sets.
155our @SUB = (); # List of subscriptions.
156
157sub parse ($) {
158 my ($fn) = @_;
159 ## Parse the file named by FN and add definitions to the tables %DEFAULT,
160 ## %CSET and @SUB.
161
162 ## Open the file and prepare to read.
163 open my $fh, "<", $fn or fail "open `$fn': $!";
164 my $ln = 0;
165
166 ## Report a syntax error, citing the offending file and line.
167 sub syntax { fail "$fn:$ln: $_[0]"; }
168
169 ## Report an error about an indented line with no stanza header.
170 sub nomode { syntax "missing stanza header" };
171 my $mode = \&nomode;
172
173 ## Parse an assignment LINE and store it in CSET.
174 sub assign {
175 my ($cset, $line) = @_;
176 $line =~ m{
177 ^ \s*
178 (?P<TAG> \w+)
179 (?: \[ (?P<IX> [^\]]+) \] )?
180 \s* = \s*
181 (?P<VALUE> | \S | \S.*\S)
182 \s* $
183 }x or syntax "invalid assignment";
184 cset_store $cset, $+{TAG}, $+{IX} // "*", $+{VALUE};
185 }
186
187 ## Parse a subscription LINE and store it in @SUB.
188 sub subscribe {
189 my ($line) = @_;
190 my @w = shellwords $line;
191 my @dist = ();
192 while (my $w = shift @w) { last if $w eq ":"; push @dist, $w; }
193 @w and @dist or syntax "empty distribution or release list";
194 push @SUB, [\@dist, \@w];
195 }
196
197 for (;;) {
198
199 ## Read a line. If it's empty or a comment then ignore it.
200 defined (my $line = readline $fh)
201 or last;
202 $ln++;
203 next if $line =~ /^\s*($|\#)/;
204 chomp $line;
205
206 ## If the line begins with whitespace then process it according to the
207 ## prevailing mode.
208 if ($line =~ /^\s/) {
209 $mode->($line);
210 next;
211 }
212
213 ## Split the header line into tokens and determine an action.
214 my @w = shellwords $line;
215 $mode = \&nomode;
216 if ($w[0] eq "distribution") {
217 @w == 2 or syntax "usage: distribution NAME";
218 my $cset = $CSET{$w[1]} //= cset_new;
219 $mode = sub { assign $cset, @_ };
220 } elsif ($w[0] eq "default") {
221 @w == 1 or syntax "usage: default";
222 $mode = sub { assign \%DEFAULT, @_ };
223 } elsif ($w[0] eq "subscribe") {
224 @w == 1 or syntax "usage: subscribe";
225 $mode = \&subscribe;
226 } else {
227 syntax "unknown toplevel directive `$w[0]'";
228 }
229 }
230
231 ## Done. Make sure we read everything.
232 close $fh or die "read `$fn': $!";
233}
234
235###--------------------------------------------------------------------------
236### Main program.
237
238our $USAGE = "usage: $QUIS FILE|DIR ...";
239sub version { print "$QUIS, version $VERSION\n"; }
240sub help {
241 print <<EOF;
242$USAGE
243
244Options:
245 -h, --help Show this help text.
246 -v, --version Show the program version number.
247EOF
248}
249
250GetOptions('help|h|?' => sub { version; help; exit; },
251 'version|v' => sub { version; exit; })
252 and @ARGV >= 1
253 or do { print STDERR $USAGE, "\n"; exit 1; };
254
255## Read the input files.
256for my $fn (@ARGV) {
257 if (-d $fn) {
258 opendir my $dh, $fn or fail "opendir `$fn': $!";
259 my @f = ();
260 FILE: while (my $f = readdir $dh) {
261 $f =~ /^[-\w.]+$/ or next FILE;
262 $f = "$fn/$f";
263 -f $f or next FILE;
264 push @f, $f;
265 }
266 closedir $dh;
267 for my $f (sort @f) { parse $f; }
268 } else {
269 parse $fn;
270 }
271}
272
273## Start writing output.
274print <<EOF;
275### -*-conf-*- GENERATED by $QUIS: DO NOT EDIT!
276###
277### Package sources.
278
279EOF
280
281## Work through the subscription list.
282for my $pair (@SUB) {
283 my @dist = @{$pair->[0]};
284 my @rel = @{$pair->[1]};
285
286 ## Write a stanza for each distribution.
287 for my $dist (@dist) {
288
289 ## Find the configuration set for the distribution.
290 defined (my $cset = $CSET{$dist})
291 or fail "unknown distribution `$dist'";
292 my @ix = cset_indices $cset, $dist;
293
294 ## Print a banner to break up the monotony.
295 my %a = ();
296 cset_fetch %a, @{[$cset, \%DEFAULT]}, 0, "default", qw(banner);
297 print "###", "-" x 74, "\n";
298 print "### ", $a{banner}, "\n" if exists $a{banner};
299
300 ## Write a paragraph for each release.
301 for my $rel (@rel) {
302
303 ## Write a header.
304 print "\n## $rel\n";
305
306 ## Prepare a list of configuration sections to provide variables for
307 ## expansion.
308 my @cset = ({ RELEASE => { "*" => $rel } }, $cset, \%DEFAULT);
309
310 ## Work through each index.
311 IX: for my $ix (@ix) {
312
313 ## Fetch properties from the configuration set.
314 %a = (options => undef,
315 release => $rel,
316 releases => "*",
317 types => "deb deb-src");
318 cset_fetch %a, @cset, 1, $ix, qw(uri components);
319 cset_fetch %a, @cset, 0, $ix, qw(types options release releases);
320
321 ## Check that this release matches the index.
322 my $matchp = 0;
323 for my $rpat (shellwords $a{releases}) {
324 $matchp = 1, last if fnmatch $rpat, $rel;
325 }
326 next IX unless $matchp;
327
328 ## Build an output line.
329 my $out = "";
a7ee99aa 330 if (defined (my $opt = $a{options})) { $out .= "[ $opt ] "; }
2920e7ba
MW
331 $out .= "$a{uri} $a{release} $a{components}";
332
333 ## Canonify whitespace.
334 $out =~ s/^\s+//; $out =~ s/\s+$//; $out =~ s/\s+/ /;
335
336 ## Write out the necessary
337 print "$_ $out\n" for shellwords $a{types};
338 }
339 }
340 print "\n";
341 }
342}
343
344## Write a trailer.
345print "###----- That's all, folks ", "-" x 50, "\n";
346print "### GENERATED by $QUIS: NO NOT EDIT!\n";
347
348###--------------------------------------------------------------------------
349### Documentation.
350
351=head1 NAME
352
353mkaptsrc - generate APT `sources.list' files
354
355=head1 SYNOPSIS
356
357B<mkaptsrc> I<file>|I<dir>...
358
359=head1 DESCRIPTION
360
361The B<mkaptsrc> progrem generates an APT F<sources.list> file from a
362collection of configuration files. It allows a site to use a single master
363file defining all (or most) of the available archives, while allowing each
364individiual host to describe succinctly which archives it actually wants to
365track.
366
367The command line arguments are a list of one or more filenames and/or
368directories. The program reads the files one by one, in order; a directory
369stands for all of the regular files it contains whose names consist only of
370alphanumeric characters, dots C<.>, underscores C<_>, and hyphens C<->, in
371ascending lexicographic order. (Nested subdirectories are ignored.) Later
372files can override settings from earlier ones.
373
374=head2 Command-line syntax
375
376The following command-line options are recognized.
377
378=over
379
380=item B<-h>, B<--help>
381
382Print help about the program to standard output, and exit.
383
384=item B<-v>, B<--version>
385
386Print B<mkaptsrc>'s version number to standard output, and exit.
387
388=back
389
390=head2 Configuration syntax
391
392The configuration files are split into stanze. Each stanza begins with an
393unindented header line, followed by zero or more indented body lines. Blank
394lines (containing only whitespace) and comments (whose first non-whitespace
395character is C<#>) are ignored E<ndash> and in particular are not considered
396when determining the boundaries of stanze. It is not possible to split a
397stanza between two files.
398
399A I<distribution stanza> consists of a line
400
401=over
402
403B<distribution> I<dist>
404
405=back
406
407followed by a number of indented assignments
408
409=over
410
411I<tag> = I<value>
412
413=back
414
415or
416
417=over
418
419I<tag>B<[>I<pat>B<]> = I<value>
420
421=back
422
423Here, I<dist> is a name for this distribution; this name is entirely internal
424to the configuration and has no external meaning. Several stanze may use the
425same I<dist>: the effect is the same as a single big stanza containing all of
426the assignments in order.
427
428Each assignment line sets the value of a I<tag> for the distribution; if the
429I<tag> has already been assigned a value then the old value is forgotten.
430The optional I<pat> may be used to assign different values to the same tag
431according to different I<contexts>, distinguished by glob patterns: see the
432description below. Omitting the I<pat> is equivalent to using the wildcard
433pattern C<*>.
434
435A I<default stanza> consists of a line
436
437=over
438
439B<defaults>
440
441=back
442
443followed by assignments as for a distribution stanza. Again, there may be
444many default stanze, and the effect is the same as a single big default
445stanza containing all of the assignments in order. During output, tags are
446looked up first in the relevant distribution, and if there no matching
447assignments then the B<defaults> assignments are searched.
448
449A I<subscription stanza> consists of a line
450
451=over
452
453B<subscribe>
454
455=back
456
457followed by indented subscription lines
458
459=over
460
461I<dist> [I<dist> ...] B<:> I<release> [I<release> ...]
462
463=back
464
465Such a line is equivalent to a sequence of lines
466
467=over
468
469I<dist> B<:> I<release> [I<release> ...]
470
471=back
472
473one for each I<dist>, in order.
474
475It is permitted for several lines to name the same I<dist>, though currently
476the behaviour is not good: they are treated entirely independently. The
477author is not sure what the correct behaviour ought to be.
478
479=head2 Tag lookup and value expansion
480
481The output of B<mkaptsrc> is largely constructed by looking up tags and using
482their values. A tag is always looked up in a particular I<distribution> and
483with reference to a particular I<context>. Contexts are named with an
484I<index>. The resulting value is the last assignment in the distribution's
485stanze whose tag is equal to the tag being looked up, and whose pattern is
486either absent or matches the context index. If there is no matching
487assignment, then the default assignments are checked, and again the last
488match is used. If there is no default assignment either, then the lookup
489fails; this might or might not be an error.
490
491Once the value has been found, it is I<expanded> before use. Any
492placeholders of the form B<%>I<tag> or B<%{>I<tag>B<}> (the latter may be
493used to distinguish the I<tag> name from any immediately following text) are
494replaced by the (expanded) value of the I<tag>, using the same distribution
495and context as the original lookup. It is a fatal error for a lookup of a
496tag to fail during expansion. Recursive expansion is forbidden.
497
498There are some special tags given values by B<mkaptsrc>. Their names are
499written in all upper-case.
500
501=head2 Output
502
503The output is always written to stdout. It begins with a header comment
504(which you can't modify), including a warning that the file is generated and
505shouldn't be edited.
506
507The output is split into sections, one for each I<dist> in the subcription
508stanze. Each section begins with a comment banner, whose text is the result
509of looking up the tag B<banner> in the distribution, using the context index
510B<default>; if the lookup fails then no banner text is added.
511
512The distribution section is split into paragraphs, one for each I<release>
513listed in the subscription line, and headed with a comment naming the
514I<release>. The contents of the paragraph are determined by assignments in
515the distribution stanza for I<dist>.
516
517The set of context indices for the paragraph is determined, as follows.
518
519=over
520
521=item *
522
523The tag B<indices> is looked up in the distribution I<dist>. This lookup is
524special in three ways: firstly, lookup will I<not> fall back to the
525B<defaults> assignments; secondly, only assignments with no pattern (or,
526equivalently, with pattern C<*>) are examined; and, thirdly, the result is
527I<not> subject to expansion. If a value is found, then the context indices
528are precisely the space-separated words of the value.
529
530=item *
531
532If there assignments in the distribution I<dist> whose patterns are
533I<literal> E<ndash> i.e., contain no metacharacters C<*>, C<?>, C<[>, or
534C<\\> E<ndash> then the context indices are precisely these literal patterns,
535in the order in which they first appeared.
536
537=item *
538
539If all of the assignments for the distribution I<dist> have no pattern (or,
540equivalently, have pattern C<*>), then there is exactly one context index
541B<default>.
542
543=item *
544
545Otherwise the situation is a fatal error. You should resolve this unlikely
546situation by setting an explicit B<indices> value.
547
548=back
549
550The contexts are now processed in turn. Each lookup described below happens
551in the distribution I<dist>, with respect to the context being processed.
552Furthermore, the special tag B<RELEASE> is given the value I<release>.
553
554The tag B<releases> is looked up, and split into a space-separated sequence
555of glob patterns. If the I<release> doesn't match any of these patterns then
556the context is ignored. (If the lookup fails, the context is always used,
557as if the value had been C<*>.)
558
559Finally, a sequence of lines is written, of the form
560
561=over
562
563I<type> S<B<[> I<options> B<]>> I<uri> I<release> I<components>
564
565=back
566
567one for each word in the value of B<types>, defaulting to B<deb> B<deb-src>.
568Other pieces correspond to the values of tags to be looked up: I<release>
569defaults to the name provided in the B<subscribe> stanza; if I<options> is
570omitted then there will be no S<B<[> I<options> B<]>> piece; it is a fatal
571error if other lookups fail.
572
573=head1 EXAMPLES
574
575The package repository for the official Linux Spotify client can be described
576as follows.
577
578 distribution spotify
579 banner = Spotify client for Linux.
580 uri = http://repository.spotify.com/
581 components = non-free
582 types = deb
583
584 subscribe
585 spotify : stable
586
587This produces the output
588
589 ###------------------------------------------------------------
590 ### Spotify client for Linux.
591
592 ## stable
593 deb http://repository.spotify.com/ stable non-free
594
595As a more complex example, I describe the official Debian package archive as
596follows.
597
598 default
599 debmirror = http://mirror.distorted.org.uk
600 debsecurity = http://security.debian.org
601
602 distribution debian
603 banner = Debian GNU/Linux.
604 uri[base] = %debmirror/debian/
605 uri[security-local] = %debmirror/debian-security/
606 uri[security-upstream] = %debsecurity/debian-security/
607 release[security-*] = %RELEASE/updates
608 releases[security-*] = oldstable stable testing
609 components = main non-free contrib
610 components[security-*] = main
611
612 subscribe
613 debian : stable testing unstable
614
615This arranges to use my local mirror for both the main archive and for
616security updates, but I<also> to use the upstream archive for security
617updates which I might not have mirrored yet. Setting B<releases[security-*]>
618copes with the fact that there are no separate security releases for the
619B<unstable> release.
620
621On machines which are far away from my mirror, I override these settings by
622writing
623
624 distribution debian
625 debmirror = http://ftp.uk.debian.org
626 indices = base security-upstream
627
628in a host-local file (which has the effect of disabling the B<security-local>
629context implicitly defined in the base stanza.
630
631=head1 BUGS
632
633Redefinition of subscriptions currently isn't well behaved.
634
635=head1 SEE ALSO
636
637L<sources.list(5)>
638
639=head1 AUTHOR
640
641Mark Wooding <mdw@distorted.org.uk>
642
643=cut
644
645###----- That's all, folks --------------------------------------------------