mkaptsrc: Use `local' for tracking paintedness of tags.
[distorted-bits] / mkaptsrc
CommitLineData
2920e7ba
MW
1#! /usr/bin/perl
2###
3### Construct an APT `sources.list' file
4###
5### (c) 2012 Mark Wooding
6###
7
8###----- Licensing notice ---------------------------------------------------
9###
10### This program is free software; you can redistribute it and/or modify
11### it under the terms of the GNU General Public License as published by
12### the Free Software Foundation; either version 2 of the License, or
13### (at your option) any later version.
14###
15### This program is distributed in the hope that it will be useful,
16### but WITHOUT ANY WARRANTY; without even the implied warranty of
17### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18### GNU General Public License for more details.
19###
20### You should have received a copy of the GNU General Public License
21### along with this program; if not, write to the Free Software Foundation,
22### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23
24use Data::Dumper;
25use File::FnMatch qw(:fnmatch);
26use Getopt::Long qw(:config gnu_compat bundling no_ignore_case);
27use Text::ParseWords;
28
29###--------------------------------------------------------------------------
30### Miscellaneous utilities.
31
32(our $QUIS = $0) =~ s:.*/::;
33our $VERSION = "1.0.0";
34
35sub fail ($) {
36 my ($msg) = @_;
37 ## Report a fatal error MSG and exit.
38
39 print STDERR "$QUIS: $msg\n";
40 exit 1;
41}
42
43sub literalp ($) {
44 my ($s) = @_;
45 ## Answer whether the string S is free of metacharacters.
46
47 return $s !~ /[\\?*[]/;
48}
49
50###--------------------------------------------------------------------------
51### Configuration sets.
52
53sub cset_new () {
54 ## Return a new configuration set.
55
56 return { "%meta" => { allstarp => 1, ixlist => [], ixmap => {} } };
57}
58
59sub cset_indices ($$) {
60 my ($cset, $what) = @_;
61 ## Return the list of literal indices in the configuration set CSET. If an
62 ## explicit `indices' tag is defined, then use its value (split at
63 ## whitespace). If there are explicit literal indices, return them (in the
64 ## correct order). If all indices are `*', return a single `default' item.
65 ## Otherwise report an error.
66
67 if (defined (my $it = $cset->{indices}{"*"})) {
68 return shellwords $it;
69 } else {
70 my $meta = $cset->{"%meta"};
71 $meta->{allstarp} and return "default";
72 return @{$meta->{ixlist}} if @{$meta->{ixlist}};
73 fail "no literal indices for `$what'";
74 }
75}
76
77sub cset_store ($$$$) {
78 my ($cset, $tag, $ix, $value) = @_;
79 ## Store VALUE in the configuration set CSET as the value for TAG with
80 ## index pattern IX.
81
82 my $meta = $cset->{"%meta"};
83 $ix eq "*" or $meta->{allstarp} = 0;
84 if (!$meta->{ixmap}{$ix} && literalp $ix) {
85 $meta->{ixmap}{$ix} = 1;
86 push @{$meta->{ixlist}}, $ix;
87 }
88 $cset->{$tag}{$ix} = $value;
89}
90
91sub cset_expand (\@$$);
92%PAINT = ();
93
94sub cset_lookup (\@$$;$) {
95 my ($cset, $tag, $ix, $mustp) = @_;
96 ## Look up TAG in the CSETs using index IX. Return the value corresponding
97 ## to the most specific match to IX in the earliest configuration set in
98 ## the list. If no set contains a matching value at all, then the
99 ## behaviour depends on MUSTP: if true, report an error; if false, return
100 ## undef.
101
102 if ($PAINT{$tag}) { fail "recursive expansion of `\%${tag}[$ix]'"; }
103 my $val = undef;
104 CSET: for my $cs (@$cset) {
105 defined (my $tset = $cs->{$tag}) or next CSET;
106 if (defined (my $it = $tset->{$ix})) { $val = $it; last CSET; };
107 my $pat = undef;
108 PAT: while (my ($p, $v) = each %$tset) {
109 fnmatch $p, $ix or next PAT;
110 unless (defined($pat) && fnmatch($p, $pat)) { $val = $v; $pat = $p; }
111 }
112 last CSET if defined $val;
113 }
114 if (defined $val) {
b8726cff 115 local $PAINT{$tag} = 1;
2920e7ba 116 my $exp = cset_expand @$cset, $ix, $val;
2920e7ba
MW
117 return $exp;
118 } elsif ($mustp) { fail "variable `$tag\[$ix]' undefined"; }
119 else { return undef; }
120}
121
122sub cset_fetch (\%\@$$@) {
123 my ($a, $cset, $mustp, $ix, @tag) = @_;
124 ## Populate the hash A with values retrieved from the CSETs. Each TAG is
125 ## looked up with index IX, and if a value is found, it is stored in A with
126 ## key TAG. If MUSTP is true, then an error is reported unless a value is
127 ## found for every TAG.
128
129 for my $tag (@tag) {
130 my $v = cset_lookup @$cset, $tag, $ix, $mustp;
131 $a->{$tag} = $v if defined $v;
132 }
133}
134
135sub cset_expand (\@$$) {
136 my ($cset, $ix, $s) = @_;
137 ## Expand placeholders %TAG or %{TAG} in the string S, relative to the
138 ## CSETs and the index IX.
139
140 $s =~ s{
141 % (?: (?P<NAME>\w+)
142 | \{ (?P<NAME>\w+) \} )
143 }{
144 cset_lookup(@$cset, $+{NAME}, $ix, 1)
145 }xeg;
146 return $s;
147}
148
149###--------------------------------------------------------------------------
150### Parsing.
151
152our %DEFAULT = %{+cset_new}; # Default assignments.
153our %CSET = (); # Map of distro configuration sets.
154our @SUB = (); # List of subscriptions.
155
156sub parse ($) {
157 my ($fn) = @_;
158 ## Parse the file named by FN and add definitions to the tables %DEFAULT,
159 ## %CSET and @SUB.
160
161 ## Open the file and prepare to read.
162 open my $fh, "<", $fn or fail "open `$fn': $!";
163 my $ln = 0;
164
165 ## Report a syntax error, citing the offending file and line.
34e50710 166 my $syntax = sub { fail "$fn:$ln: $_[0]" };
2920e7ba
MW
167
168 ## Report an error about an indented line with no stanza header.
34e50710
MW
169 my $nomode = sub { $syntax->("missing stanza header") };
170 my $mode = $nomode;
2920e7ba
MW
171
172 ## Parse an assignment LINE and store it in CSET.
34e50710 173 my $assign = sub {
2920e7ba
MW
174 my ($cset, $line) = @_;
175 $line =~ m{
176 ^ \s*
177 (?P<TAG> \w+)
178 (?: \[ (?P<IX> [^\]]+) \] )?
179 \s* = \s*
180 (?P<VALUE> | \S | \S.*\S)
181 \s* $
34e50710 182 }x or $syntax->("invalid assignment");
2920e7ba 183 cset_store $cset, $+{TAG}, $+{IX} // "*", $+{VALUE};
34e50710 184 };
2920e7ba
MW
185
186 ## Parse a subscription LINE and store it in @SUB.
34e50710 187 my $subscribe = sub {
2920e7ba
MW
188 my ($line) = @_;
189 my @w = shellwords $line;
190 my @dist = ();
191 while (my $w = shift @w) { last if $w eq ":"; push @dist, $w; }
34e50710 192 @w and @dist or $syntax->("empty distribution or release list");
2920e7ba 193 push @SUB, [\@dist, \@w];
34e50710 194 };
2920e7ba
MW
195
196 for (;;) {
197
198 ## Read a line. If it's empty or a comment then ignore it.
199 defined (my $line = readline $fh)
200 or last;
201 $ln++;
202 next if $line =~ /^\s*($|\#)/;
203 chomp $line;
204
205 ## If the line begins with whitespace then process it according to the
206 ## prevailing mode.
207 if ($line =~ /^\s/) {
208 $mode->($line);
209 next;
210 }
211
212 ## Split the header line into tokens and determine an action.
213 my @w = shellwords $line;
34e50710 214 $mode = $nomode;
2920e7ba 215 if ($w[0] eq "distribution") {
34e50710 216 @w == 2 or $syntax->("usage: distribution NAME");
2920e7ba 217 my $cset = $CSET{$w[1]} //= cset_new;
34e50710 218 $mode = sub { $assign->($cset, @_) };
2920e7ba 219 } elsif ($w[0] eq "default") {
34e50710
MW
220 @w == 1 or $syntax->("usage: default");
221 $mode = sub { $assign->(\%DEFAULT, @_) };
2920e7ba 222 } elsif ($w[0] eq "subscribe") {
34e50710
MW
223 @w == 1 or $syntax->("usage: subscribe");
224 $mode = $subscribe;
2920e7ba 225 } else {
34e50710 226 $syntax->("unknown toplevel directive `$w[0]'");
2920e7ba
MW
227 }
228 }
229
230 ## Done. Make sure we read everything.
231 close $fh or die "read `$fn': $!";
232}
233
234###--------------------------------------------------------------------------
235### Main program.
236
237our $USAGE = "usage: $QUIS FILE|DIR ...";
238sub version { print "$QUIS, version $VERSION\n"; }
239sub help {
240 print <<EOF;
241$USAGE
242
243Options:
244 -h, --help Show this help text.
245 -v, --version Show the program version number.
246EOF
247}
248
249GetOptions('help|h|?' => sub { version; help; exit; },
250 'version|v' => sub { version; exit; })
251 and @ARGV >= 1
252 or do { print STDERR $USAGE, "\n"; exit 1; };
253
254## Read the input files.
255for my $fn (@ARGV) {
256 if (-d $fn) {
257 opendir my $dh, $fn or fail "opendir `$fn': $!";
258 my @f = ();
259 FILE: while (my $f = readdir $dh) {
260 $f =~ /^[-\w.]+$/ or next FILE;
261 $f = "$fn/$f";
262 -f $f or next FILE;
263 push @f, $f;
264 }
265 closedir $dh;
266 for my $f (sort @f) { parse $f; }
267 } else {
268 parse $fn;
269 }
270}
271
272## Start writing output.
273print <<EOF;
274### -*-conf-*- GENERATED by $QUIS: DO NOT EDIT!
275###
276### Package sources.
277
278EOF
279
280## Work through the subscription list.
281for my $pair (@SUB) {
282 my @dist = @{$pair->[0]};
283 my @rel = @{$pair->[1]};
284
285 ## Write a stanza for each distribution.
286 for my $dist (@dist) {
287
288 ## Find the configuration set for the distribution.
289 defined (my $cset = $CSET{$dist})
290 or fail "unknown distribution `$dist'";
291 my @ix = cset_indices $cset, $dist;
292
293 ## Print a banner to break up the monotony.
294 my %a = ();
295 cset_fetch %a, @{[$cset, \%DEFAULT]}, 0, "default", qw(banner);
296 print "###", "-" x 74, "\n";
297 print "### ", $a{banner}, "\n" if exists $a{banner};
298
299 ## Write a paragraph for each release.
300 for my $rel (@rel) {
301
302 ## Write a header.
303 print "\n## $rel\n";
304
305 ## Prepare a list of configuration sections to provide variables for
306 ## expansion.
307 my @cset = ({ RELEASE => { "*" => $rel } }, $cset, \%DEFAULT);
308
309 ## Work through each index.
310 IX: for my $ix (@ix) {
311
312 ## Fetch properties from the configuration set.
313 %a = (options => undef,
314 release => $rel,
315 releases => "*",
316 types => "deb deb-src");
317 cset_fetch %a, @cset, 1, $ix, qw(uri components);
318 cset_fetch %a, @cset, 0, $ix, qw(types options release releases);
319
320 ## Check that this release matches the index.
321 my $matchp = 0;
322 for my $rpat (shellwords $a{releases}) {
323 $matchp = 1, last if fnmatch $rpat, $rel;
324 }
325 next IX unless $matchp;
326
327 ## Build an output line.
328 my $out = "";
a7ee99aa 329 if (defined (my $opt = $a{options})) { $out .= "[ $opt ] "; }
2920e7ba
MW
330 $out .= "$a{uri} $a{release} $a{components}";
331
332 ## Canonify whitespace.
333 $out =~ s/^\s+//; $out =~ s/\s+$//; $out =~ s/\s+/ /;
334
335 ## Write out the necessary
336 print "$_ $out\n" for shellwords $a{types};
337 }
338 }
339 print "\n";
340 }
341}
342
343## Write a trailer.
344print "###----- That's all, folks ", "-" x 50, "\n";
345print "### GENERATED by $QUIS: NO NOT EDIT!\n";
346
347###--------------------------------------------------------------------------
348### Documentation.
349
350=head1 NAME
351
352mkaptsrc - generate APT `sources.list' files
353
354=head1 SYNOPSIS
355
356B<mkaptsrc> I<file>|I<dir>...
357
358=head1 DESCRIPTION
359
360The B<mkaptsrc> progrem generates an APT F<sources.list> file from a
361collection of configuration files. It allows a site to use a single master
362file defining all (or most) of the available archives, while allowing each
363individiual host to describe succinctly which archives it actually wants to
364track.
365
366The command line arguments are a list of one or more filenames and/or
367directories. The program reads the files one by one, in order; a directory
368stands for all of the regular files it contains whose names consist only of
369alphanumeric characters, dots C<.>, underscores C<_>, and hyphens C<->, in
370ascending lexicographic order. (Nested subdirectories are ignored.) Later
371files can override settings from earlier ones.
372
373=head2 Command-line syntax
374
375The following command-line options are recognized.
376
377=over
378
379=item B<-h>, B<--help>
380
381Print help about the program to standard output, and exit.
382
383=item B<-v>, B<--version>
384
385Print B<mkaptsrc>'s version number to standard output, and exit.
386
387=back
388
389=head2 Configuration syntax
390
391The configuration files are split into stanze. Each stanza begins with an
392unindented header line, followed by zero or more indented body lines. Blank
393lines (containing only whitespace) and comments (whose first non-whitespace
394character is C<#>) are ignored E<ndash> and in particular are not considered
395when determining the boundaries of stanze. It is not possible to split a
396stanza between two files.
397
398A I<distribution stanza> consists of a line
399
400=over
401
402B<distribution> I<dist>
403
404=back
405
406followed by a number of indented assignments
407
408=over
409
410I<tag> = I<value>
411
412=back
413
414or
415
416=over
417
418I<tag>B<[>I<pat>B<]> = I<value>
419
420=back
421
422Here, I<dist> is a name for this distribution; this name is entirely internal
423to the configuration and has no external meaning. Several stanze may use the
424same I<dist>: the effect is the same as a single big stanza containing all of
425the assignments in order.
426
427Each assignment line sets the value of a I<tag> for the distribution; if the
428I<tag> has already been assigned a value then the old value is forgotten.
429The optional I<pat> may be used to assign different values to the same tag
430according to different I<contexts>, distinguished by glob patterns: see the
431description below. Omitting the I<pat> is equivalent to using the wildcard
432pattern C<*>.
433
434A I<default stanza> consists of a line
435
436=over
437
438B<defaults>
439
440=back
441
442followed by assignments as for a distribution stanza. Again, there may be
443many default stanze, and the effect is the same as a single big default
444stanza containing all of the assignments in order. During output, tags are
445looked up first in the relevant distribution, and if there no matching
446assignments then the B<defaults> assignments are searched.
447
448A I<subscription stanza> consists of a line
449
450=over
451
452B<subscribe>
453
454=back
455
456followed by indented subscription lines
457
458=over
459
460I<dist> [I<dist> ...] B<:> I<release> [I<release> ...]
461
462=back
463
464Such a line is equivalent to a sequence of lines
465
466=over
467
468I<dist> B<:> I<release> [I<release> ...]
469
470=back
471
472one for each I<dist>, in order.
473
474It is permitted for several lines to name the same I<dist>, though currently
475the behaviour is not good: they are treated entirely independently. The
476author is not sure what the correct behaviour ought to be.
477
478=head2 Tag lookup and value expansion
479
480The output of B<mkaptsrc> is largely constructed by looking up tags and using
481their values. A tag is always looked up in a particular I<distribution> and
482with reference to a particular I<context>. Contexts are named with an
483I<index>. The resulting value is the last assignment in the distribution's
484stanze whose tag is equal to the tag being looked up, and whose pattern is
485either absent or matches the context index. If there is no matching
486assignment, then the default assignments are checked, and again the last
487match is used. If there is no default assignment either, then the lookup
488fails; this might or might not be an error.
489
490Once the value has been found, it is I<expanded> before use. Any
491placeholders of the form B<%>I<tag> or B<%{>I<tag>B<}> (the latter may be
492used to distinguish the I<tag> name from any immediately following text) are
493replaced by the (expanded) value of the I<tag>, using the same distribution
494and context as the original lookup. It is a fatal error for a lookup of a
495tag to fail during expansion. Recursive expansion is forbidden.
496
497There are some special tags given values by B<mkaptsrc>. Their names are
498written in all upper-case.
499
500=head2 Output
501
502The output is always written to stdout. It begins with a header comment
503(which you can't modify), including a warning that the file is generated and
504shouldn't be edited.
505
506The output is split into sections, one for each I<dist> in the subcription
507stanze. Each section begins with a comment banner, whose text is the result
508of looking up the tag B<banner> in the distribution, using the context index
509B<default>; if the lookup fails then no banner text is added.
510
511The distribution section is split into paragraphs, one for each I<release>
512listed in the subscription line, and headed with a comment naming the
513I<release>. The contents of the paragraph are determined by assignments in
514the distribution stanza for I<dist>.
515
516The set of context indices for the paragraph is determined, as follows.
517
518=over
519
520=item *
521
522The tag B<indices> is looked up in the distribution I<dist>. This lookup is
523special in three ways: firstly, lookup will I<not> fall back to the
524B<defaults> assignments; secondly, only assignments with no pattern (or,
525equivalently, with pattern C<*>) are examined; and, thirdly, the result is
526I<not> subject to expansion. If a value is found, then the context indices
527are precisely the space-separated words of the value.
528
529=item *
530
531If there assignments in the distribution I<dist> whose patterns are
532I<literal> E<ndash> i.e., contain no metacharacters C<*>, C<?>, C<[>, or
533C<\\> E<ndash> then the context indices are precisely these literal patterns,
534in the order in which they first appeared.
535
536=item *
537
538If all of the assignments for the distribution I<dist> have no pattern (or,
539equivalently, have pattern C<*>), then there is exactly one context index
540B<default>.
541
542=item *
543
544Otherwise the situation is a fatal error. You should resolve this unlikely
545situation by setting an explicit B<indices> value.
546
547=back
548
549The contexts are now processed in turn. Each lookup described below happens
550in the distribution I<dist>, with respect to the context being processed.
551Furthermore, the special tag B<RELEASE> is given the value I<release>.
552
553The tag B<releases> is looked up, and split into a space-separated sequence
554of glob patterns. If the I<release> doesn't match any of these patterns then
555the context is ignored. (If the lookup fails, the context is always used,
556as if the value had been C<*>.)
557
558Finally, a sequence of lines is written, of the form
559
560=over
561
562I<type> S<B<[> I<options> B<]>> I<uri> I<release> I<components>
563
564=back
565
566one for each word in the value of B<types>, defaulting to B<deb> B<deb-src>.
567Other pieces correspond to the values of tags to be looked up: I<release>
568defaults to the name provided in the B<subscribe> stanza; if I<options> is
569omitted then there will be no S<B<[> I<options> B<]>> piece; it is a fatal
570error if other lookups fail.
571
572=head1 EXAMPLES
573
574The package repository for the official Linux Spotify client can be described
575as follows.
576
577 distribution spotify
578 banner = Spotify client for Linux.
579 uri = http://repository.spotify.com/
580 components = non-free
581 types = deb
582
583 subscribe
584 spotify : stable
585
586This produces the output
587
588 ###------------------------------------------------------------
589 ### Spotify client for Linux.
590
591 ## stable
592 deb http://repository.spotify.com/ stable non-free
593
594As a more complex example, I describe the official Debian package archive as
595follows.
596
597 default
598 debmirror = http://mirror.distorted.org.uk
599 debsecurity = http://security.debian.org
600
601 distribution debian
602 banner = Debian GNU/Linux.
603 uri[base] = %debmirror/debian/
604 uri[security-local] = %debmirror/debian-security/
605 uri[security-upstream] = %debsecurity/debian-security/
606 release[security-*] = %RELEASE/updates
607 releases[security-*] = oldstable stable testing
608 components = main non-free contrib
609 components[security-*] = main
610
611 subscribe
612 debian : stable testing unstable
613
614This arranges to use my local mirror for both the main archive and for
615security updates, but I<also> to use the upstream archive for security
616updates which I might not have mirrored yet. Setting B<releases[security-*]>
617copes with the fact that there are no separate security releases for the
618B<unstable> release.
619
620On machines which are far away from my mirror, I override these settings by
621writing
622
623 distribution debian
624 debmirror = http://ftp.uk.debian.org
625 indices = base security-upstream
626
627in a host-local file (which has the effect of disabling the B<security-local>
628context implicitly defined in the base stanza.
629
630=head1 BUGS
631
632Redefinition of subscriptions currently isn't well behaved.
633
634=head1 SEE ALSO
635
636L<sources.list(5)>
637
638=head1 AUTHOR
639
640Mark Wooding <mdw@distorted.org.uk>
641
642=cut
643
644###----- That's all, folks --------------------------------------------------