More improvements to URL recognizer.
[sw-tools] / perl / SWMan.pm
1 # -*-perl-*-
2 #
3 # $Id: SWMan.pm,v 1.3 1999/08/19 12:11:10 mdw Exp $
4 #
5 # Display and other fiddling of manual pages
6 #
7 # (c) 1999 EBI
8 #
9
10 #----- Licensing notice -----------------------------------------------------
11 #
12 # This file is part of sw-tools.
13 #
14 # sw-tools is free software; you can redistribute it and/or modify
15 # it under the terms of the GNU General Public License as published by
16 # the Free Software Foundation; either version 2 of the License, or
17 # (at your option) any later version.
18 #
19 # sw-tools is distributed in the hope that it will be useful,
20 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 # GNU General Public License for more details.
23 #
24 # You should have received a copy of the GNU General Public License
25 # along with sw-tools; if not, write to the Free Software Foundation,
26 # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
27
28 #----- Revision history -----------------------------------------------------
29 #
30 # $Log: SWMan.pm,v $
31 # Revision 1.3 1999/08/19 12:11:10 mdw
32 # More improvements to URL recognizer.
33 #
34 # Revision 1.2 1999/08/18 17:10:07 mdw
35 # Slight improvements to URL and email address parsing.
36 #
37 # Revision 1.1 1999/07/30 18:46:37 mdw
38 # New CGI script for browsing installed software and documentation.
39 #
40
41 #----- Package preamble -----------------------------------------------------
42
43 package SWMan;
44
45 use IO;
46 use POSIX;
47 use DirHandle;
48 use Exporter;
49
50 use SWConfig;
51 use SWCGI qw(:DEFAULT :layout);
52
53 @ISA = qw(Exporter);
54 @EXPORT_OK = qw(subst urlsubst check);
55
56 #----- Useful functions -----------------------------------------------------
57
58 %mandb = ();
59
60 # --- @mans(SECTION)@ ---
61 #
62 # Returns a reference to a list of manual pages in the given section.
63
64 sub mans($) {
65 my ($sec) = @_;
66 $mandb{$sec} and return $mandb{sec};
67
68 my $d = DirHandle->new("$C{prefix}/man/man$sec") or return undef;
69 my @f;
70 while (my $f = $d->read()) {
71 push(@f, $f);
72 }
73 $mandb{$sec} = \@f;
74 return \@f;
75 }
76
77 # --- @check(NAME, SECTION)@ ---
78 #
79 # See whether there's a manpage called NAME with section SECTION.
80
81 sub check($$) {
82 my $pre = "$C{prefix}/man/man";
83 my ($man, $sec) = @_;
84 my $f;
85
86 # --- Quick check for obvious things ---
87
88 my ($base) = ($sec =~ /^(\d+)/);
89 $f = "$pre$base/$man.$sec";
90 -r $f and return $f; $f .= ".gz"; -r $f and return $f;
91
92 # --- Snarf the appropriate filename list ---
93
94 my $fs = mans($base) or return undef;
95 foreach my $f (@$fs) {
96 $f =~ /^$man\.$sec\w+(\.gz)?$/ and return "$C{prefix}/man/man$base/$f";
97 }
98 return undef;
99 }
100
101 # --- @subst(STRING, NAME, SECTION)@ ---
102 #
103 # If NAME(SECTION) is a manual page, return the STRING appropriately wrapped
104 # in an anchor element; otherwise return it unmolested.
105
106 sub subst($$$) {
107 my ($s, $n, $sec) = @_;
108 check($n, $sec) and
109 return "<a href=\"$ref?act=man&man=$n&sec=$sec\">$s</a>";
110 return "$s";
111 }
112
113 # --- @urlsubst(URL, STRING)@ ---
114 #
115 # Substitutes in a URL reference. The important bit is that embedded `&'
116 # characters are un-entitied from `&amp;'. This doesn't seem to upset
117 # Netscape or Lynx as much as I'd expect (or, in fact, at all), but it's
118 # slightly untidy.
119
120 sub urlsubst($$) {
121 my ($url, $name) = @_;
122 $url =~ s/\&amp\;/&/;
123 return "<a href=\"$url\">$name</a>";
124 }
125
126 # --- @sections()@ ---
127 #
128 # Return a list of manual sections.
129
130 @sectionlist = ();
131
132 sub sections() {
133 return @sectionlist if @sectionlist;
134 my @s = ();
135 my $d = DirHandle->new("$C{prefix}/man") or
136 barf("couldn't open man directory: $!");
137 while ($f = $d->read()) {
138 next if $f !~ /^man/ || !-d "$C{prefix}/man/$f";
139 push(@s, $');
140 }
141 return (@sectionlist = sort(@s));
142 }
143
144 #----- Display a quick section index ----------------------------------------
145
146 sub quickie {
147 print "Quick section index:\n";
148 foreach $s (sections()) {
149 print "<a href=\"$ref?act=man&sec=$s\">$s</a>\n";
150 }
151 }
152
153 #----- Display indices for manual sections ----------------------------------
154
155 sub dosection($) {
156 my ($sec) = @_;
157 my @m = ();
158
159 barf("illegal section `$sec'") if $sec =~ m:/:;
160
161 # --- Snarf the list of manual pages in this section ---
162
163 {
164 my $d = DirHandle->new("$C{prefix}/man/man$sec") or
165 barf("couldn't read directory `$C{prefix}/man/man$sec': $!");
166 while (my $f = $d->read()) {
167 my ($man, $sec) = split(/\./, $f, 3);
168 push(@m, "$man($sec)") if $sec;
169 }
170 }
171
172 # --- Sort and emit the index ---
173
174 print("<h4>Section $sec</h4>\n<table>");
175
176 {
177 my $col = 0;
178 foreach my $m (sort(@m)) {
179 my ($man, $sec) = $m =~ /^(.*)\((.*)\)$/;
180 $col or print("<tr>\n");
181 print("<td><a href=\"$ref?act=man&man=$man&sec=$sec\">$m</a>\n");
182 $col = ($col + 1) % 5;
183 }
184 }
185
186 print("</table>\n");
187 }
188
189 sub section {
190 my $sec = $Q{"sec"};
191 header("Index of manual section $sec");
192 quickie(); print "<hr>\n";
193 dosection($sec);
194 print "<hr>\n"; quickie();;
195 footer();
196 }
197
198 sub index {
199 header("Manual page index");
200 print("<h3>Manual page index</h3>\n");
201 foreach my $s (sections()) { dosection($s); }
202 footer();
203 }
204
205 #----- Display a manual page ------------------------------------------------
206
207 sub man {
208 my ($man, $sec) = ($Q{"man"}, $Q{"sec"});
209
210 $sec or &index(), return;
211 $man or &section(), return;
212
213 my $file = check($man, $sec) or
214 barf("no manual page $man($sec)");
215 barf("illegal filename `$file'") if $file =~ m:\./:;
216
217 # --- Read the manual page ---
218
219 my $p = IO::Pipe->new();
220 my $kid = fork();
221 defined($kid) or barf("fork failed: $!");
222 if ($kid == 0) {
223 $p->writer();
224 dup2($p->fileno(), 1);
225 chdir("$C{prefix}/man");
226 if ($file =~ /\.gz$/) {
227 $pp = IO::Pipe->new;
228 $kkid = fork();
229 defined($kid) or exit(127);
230 if ($kkid == 0) {
231 $pp->writer();
232 dup2($pp->fileno, 1);
233 exec("gzip", "-dc", $file);
234 exit(127);
235 }
236 exec("nroff", "-man");
237 } else {
238 exec("nroff", "-man", $file);
239 }
240 exit(127);
241 }
242 $p->reader();
243
244 # --- Spit out the manual page now ---
245
246 header("Manual page $Q{man}($Q{sec})");
247 quickie(); print "<hr>\n";
248 print "<pre>\n";
249 while (my $line = $p->getline()) {
250 chomp $line;
251
252 # --- Grind through the line turning it into HTML ---
253
254 {
255 my $state = "";
256 my $l = "";
257
258 for (my $i = 0; $i < length($line); $i++) {
259 my $ch = substr($line, $i, 1);
260 my $nstate = "";
261
262 # --- Sort out overstriking ---
263
264 if (substr($line, $i + 1, 1) eq "\b") {
265 my ($italic, $bold) = (0, 0);
266 $ch eq "_" and $italic = 1;
267 $ch eq substr($line, $i + 2, 1) and $bold = 1;
268 $ch = substr($line, $i + 2, 1);
269 while (substr($line, $i + 1, 1) eq "\b") { $i += 2; }
270 if ($italic && $bold) {
271 $nstate = $state ? $state : "b";
272 } elsif ($italic) {
273 $nstate = "i";
274 } elsif ($bold) {
275 $nstate = "b";
276 }
277 }
278 $state ne $nstate and
279 $l .= ($state && "</$state>") . ($nstate && "<$nstate>");
280 $state = $nstate;
281
282 # --- Translate the character if it's magical ---
283
284 $ch eq "&" and $ch = "&amp;";
285 $ch eq "<" and $ch = "&lt;<";
286 $ch eq ">" and $ch = ">&gt;";
287 $l .= $ch;
288 }
289 $state and $l .= "</$state>";
290
291 # --- Now find manual references in there ---
292 #
293 # I don't use /x regexps very often, but I think this is a good excuse.
294
295 $l =~ s! ((?:\<[bi]\>)*) # Leading highlighting tags
296 ([-_.\w]+) # Various plausible manual name chars
297 ((?:\</[bi]\>)* # Closing highlighting tags
298 (?:\<[bi]\>)* # And opening ones again
299 \( # An open parenthesis
300 (?:\<[bi]\>)*) # More opening highlights
301 (\d+\w*) # The section number
302 ((?:\</[bi]\>)* # Close highlights
303 \) # Close parens
304 (?:\</[bi]\>)*) # Finally more closing tags
305 ! subst($&, $2, $4) !egx;
306
307 # --- And email and hypertext references too ---
308
309 $l =~ s! ((?:\<[bi]\>)*) # Leading highlighting
310 ( \b (?: http s? | ftp | file | news ) # A protocol name
311 : # The important and obvious bit
312 [^]<>)\s<>\'\"]+ # Most characters are allowed
313 [^]<>).,\s<>\'\"]) # Don't end on punctuation
314 ((?:\</[bi]\>)*) # Closing tags, optional
315 !urlsubst($2, $&)!egx;
316
317 $l =~ s! ( (?:\<[bi]\>)* (?:\bmailto:)? )
318 ( [^\s()<>;:{}&<>,.\`\'\"] [^\s()<>;:{}&<>\`\'\"]* \@
319 [^\s()<>;:{&}<>\'\"]* [^\s()<>;:{}&<>.,\'\"])
320 ((?:\</[bi]\>)*)
321 !<a href="mailto:$2">$&</a>!gx;
322
323 # --- Fix up the HTML ---
324
325 $l =~ s/\&lt\;\</&lt;/g;
326 $l =~ s/\>\&gt\;/&gt;/g;
327
328 # --- Done! ---
329
330 print $l, "\n";
331 }
332 }
333
334 # --- Done all of that ---
335
336 print "</pre>\n";
337 $p->close();
338 waitpid($kid, 0);
339 barf("nroff failed (exit status $?)") if $?;
340 print "<hr>\n"; quickie();;
341 footer();
342 }
343
344 #----- Register actions -----------------------------------------------------
345
346 $main::ACT{"man"} = \&man;
347
348 #----- That's all, folks ----------------------------------------------------
349
350 1;