From fae2108b8a8d45ebef4d49fcb0964453c2be32a7 Mon Sep 17 00:00:00 2001 From: mdw Date: Thu, 19 Aug 1999 12:11:10 +0000 Subject: [PATCH] More improvements to URL recognizer. --- perl/SWDoc.pm | 43 ++++++++++++++++++++++++++++++++++++------- perl/SWInfo.pm | 35 +++++++++++++++++++++++++++++------ perl/SWMan.pm | 45 +++++++++++++++++++++++++++++++++------------ 3 files changed, 98 insertions(+), 25 deletions(-) diff --git a/perl/SWDoc.pm b/perl/SWDoc.pm index d519b11..aeedb04 100644 --- a/perl/SWDoc.pm +++ b/perl/SWDoc.pm @@ -1,6 +1,6 @@ # -*-perl-*- # -# $Id: SWDoc.pm,v 1.2 1999/08/18 17:10:07 mdw Exp $ +# $Id: SWDoc.pm,v 1.3 1999/08/19 12:11:09 mdw Exp $ # # Display documentation files # @@ -28,6 +28,9 @@ #----- Revision history ----------------------------------------------------- # # $Log: SWDoc.pm,v $ +# Revision 1.3 1999/08/19 12:11:09 mdw +# More improvements to URL recognizer. +# # Revision 1.2 1999/08/18 17:10:07 mdw # Slight improvements to URL and email address parsing. # @@ -59,12 +62,38 @@ sub doc { while (my $line = $fh->getline()) { last if $line =~ / /; $line =~ s/\&/&/g; - $line =~ s/\/>/g; - $line =~ s!\b(https?|ftp|file|news):[^]&)\s]*[^]&).,\s\']!$&!g; - $line =~ s!info:([^]&)\s]*[^]&).,\s\'\"])!$&!g; - $line =~ s!(?:\bmailto:)?([^\s()&;:{}.,\`\"][^\s()&;:{}\`\"]*\@[^\s()&;:{}\'\"]*[^\s()&;:{}.,\'\"])!$&!g; - $line =~ s!([-_.\w]+)\((\d+\w*)\)!SWMan::subst("$1($2)", $1, $2)!eg; + $line =~ s/\/>>/g; + + # --- Spot URLs (except `mailto') --- + + $line =~ s! \b (http s? | ftp | file | news) : + [^])\s<>]* [^]<>&).,\s\'] + !SWMan::urlsubst($&, $&)!egx; + + # --- Spot email addresses (including `mailto' URLs) --- + + $line =~ s! (?:\bmailto:)? + ([^\s()&;:<>&{}.,\`\'\"] [^\s()&;:<>&{}\`\'\"]* + \@ + [^\s()&;:{}<>&\'\"]* [^\s()&;:{}<>&.,\'\"]) + !$&!gx; + + # --- Spot info references --- + + $line =~ s! \b info: ([^]&)\s<>]* [^]&).,\s<>\'\"]) + !$&!gx; + + # --- Spot manpage references --- + + $line =~ s! ([-_.\w]+) \( (\d+\w*) \) + !SWMan::subst("$1($2)", $1, $2)!egx; + + # --- Finally fix up the HTML properly --- + + $line =~ s/\<\;\\>\;/>/g; + print $line; } print "\n"; diff --git a/perl/SWInfo.pm b/perl/SWInfo.pm index 276ac05..42c4119 100644 --- a/perl/SWInfo.pm +++ b/perl/SWInfo.pm @@ -1,6 +1,6 @@ # -*-perl-*- # -# $Id: SWInfo.pm,v 1.2 1999/08/18 17:10:07 mdw Exp $ +# $Id: SWInfo.pm,v 1.3 1999/08/19 12:11:10 mdw Exp $ # # Read and output GNU Info files # @@ -28,6 +28,9 @@ #----- Revision history ----------------------------------------------------- # # $Log: SWInfo.pm,v $ +# Revision 1.3 1999/08/19 12:11:10 mdw +# More improvements to URL recognizer. +# # Revision 1.2 1999/08/18 17:10:07 mdw # Slight improvements to URL and email address parsing. # @@ -109,8 +112,8 @@ sub info { # --- Now translate the node into HTML, first line first --- $n =~ s/\&/&/; - $n =~ s/\/>/; + $n =~ s/\/>>/; $n =~ s/\A( [^\n]* Next:\ *) ([^,\n]*) / $1 . subst($2, $file, $i) /eix; $n =~ s/\A( [^\n]* Prev:\ *) ([^,\n]*) / $1 . subst($2, $file, $i) /eix; $n =~ s/\A( [^\n]* Up:\ *) ([^,\n]*) / $1 . subst($2, $file, $i) /eix; @@ -144,9 +147,29 @@ sub info { } $out .= $n; - $out =~ s!\b(https?|ftp|file|news):[^]&)\s]*[^]&).,\s\'\"]!$&!g; - $out =~ s!(?:\bmailto:)?([^\s()&;:{}.,\`\"][^\s()&;:{}\`\"]*\@[^\s()&;:{}\'\"]*[^\s()&;:{}.,\'\"])!$&!g; - $out =~ s!([-_.\w]+)\((\d+\w*)\)!SWMan::subst("$1($2)", $1, $2)!eg; + # --- Spot URLs (except `mailto') --- + + $out =~ s! \b (http s? | ftp | file | news) : + [^]<>)\s]* [^]<>).,\s\'] + !urlsubst($&, $&)!egx; + + # --- Spot email addresses (including `mailto' URLs) --- + + $out =~ s! (?:\bmailto:)? + ([^\s()<>&;:{}.,\`\'\"] [^\s()<>&;:{}\`\'\"]* + \@ + [^\s()<>&;:{}\'\"]* [^\s()<>&;:{}.,\'\"]) + !$&!gx; + + # --- Spot manpage references --- + + $out =~ s! ([-_.\w]+) \( (\d+\w*) \) + !SWMan::subst("$1($2)", $1, $2)!egx; + + # --- Fix up the HTML --- + + $out =~ s/\<\\>/>/g; header("Info: ($file)$node"); print("
\n$out
\n"); diff --git a/perl/SWMan.pm b/perl/SWMan.pm index b27fca8..e799148 100644 --- a/perl/SWMan.pm +++ b/perl/SWMan.pm @@ -1,6 +1,6 @@ # -*-perl-*- # -# $Id: SWMan.pm,v 1.2 1999/08/18 17:10:07 mdw Exp $ +# $Id: SWMan.pm,v 1.3 1999/08/19 12:11:10 mdw Exp $ # # Display and other fiddling of manual pages # @@ -28,6 +28,9 @@ #----- Revision history ----------------------------------------------------- # # $Log: SWMan.pm,v $ +# Revision 1.3 1999/08/19 12:11:10 mdw +# More improvements to URL recognizer. +# # Revision 1.2 1999/08/18 17:10:07 mdw # Slight improvements to URL and email address parsing. # @@ -48,7 +51,7 @@ use SWConfig; use SWCGI qw(:DEFAULT :layout); @ISA = qw(Exporter); -@EXPORT_OK = qw(subst check); +@EXPORT_OK = qw(subst urlsubst check); #----- Useful functions ----------------------------------------------------- @@ -107,6 +110,19 @@ sub subst($$$) { return "$s"; } +# --- @urlsubst(URL, STRING)@ --- +# +# Substitutes in a URL reference. The important bit is that embedded `&' +# characters are un-entitied from `&'. This doesn't seem to upset +# Netscape or Lynx as much as I'd expect (or, in fact, at all), but it's +# slightly untidy. + +sub urlsubst($$) { + my ($url, $name) = @_; + $url =~ s/\&\;/&/; + return "$name"; +} + # --- @sections()@ --- # # Return a list of manual sections. @@ -266,8 +282,8 @@ sub man { # --- Translate the character if it's magical --- $ch eq "&" and $ch = "&"; - $ch eq "<" and $ch = "<"; - $ch eq ">" and $ch = ">"; + $ch eq "<" and $ch = "<<"; + $ch eq ">" and $ch = ">>"; $l .= $ch; } $state and $l .= ""; @@ -291,18 +307,23 @@ sub man { # --- And email and hypertext references too --- $l =~ s! ((?:\<[bi]\>)*) # Leading highlighting - ((?:https?|ftp|file|news) # A protocol name + ( \b (?: http s? | ftp | file | news ) # A protocol name : # The important and obvious bit - [^]&)\s]+ # Most characters are allowed - [^]&).,\s\'\"]) # Don't end on punctuation + [^]<>)\s<>\'\"]+ # Most characters are allowed + [^]<>).,\s<>\'\"]) # Don't end on punctuation ((?:\)*) # Closing tags, optional - !$1$&$3!gx; + !urlsubst($2, $&)!egx; - $l =~ s! ((?:\<[bi]\>)*(?:\bmailto:)?) - ( [^\s()&;:{}<>,.\`\"] [^\s()&;:{}<>\`\"]* \@ - [^\s()&;:{}<>\'\"]* [^\s()&;:{}<>.,\'\"]) + $l =~ s! ( (?:\<[bi]\>)* (?:\bmailto:)? ) + ( [^\s()<>;:{}&<>,.\`\'\"] [^\s()<>;:{}&<>\`\'\"]* \@ + [^\s()<>;:{&}<>\'\"]* [^\s()<>;:{}&<>.,\'\"]) ((?:\)*) - !$1$&$3!gx; + !$&!gx; + + # --- Fix up the HTML --- + + $l =~ s/\<\;\\>\;/>/g; # --- Done! --- -- 2.11.0