More improvements to URL recognizer.

author mdw <mdw>

Thu, 19 Aug 1999 12:11:10 +0000 (12:11 +0000)

committer mdw <mdw>

Thu, 19 Aug 1999 12:11:10 +0000 (12:11 +0000)
author mdw <mdw>
Thu, 19 Aug 1999 12:11:10 +0000 (12:11 +0000)
committer mdw <mdw>
Thu, 19 Aug 1999 12:11:10 +0000 (12:11 +0000)
diff --git a/perl/SWDoc.pm b/perl/SWDoc.pm

index d519b11..aeedb04 100644 (file)
--- a/perl/SWDoc.pm
+++ b/perl/SWDoc.pm
@@ -1,6 +1,6 @@
  # -*-perl-*-
  #
-# $Id: SWDoc.pm,v 1.2 1999/08/18 17:10:07 mdw Exp $
+# $Id: SWDoc.pm,v 1.3 1999/08/19 12:11:09 mdw Exp $
  #
  # Display documentation files
  #
@@ -28,6 +28,9 @@
  #----- Revision history -----------------------------------------------------
  #
  # $Log: SWDoc.pm,v $
+# Revision 1.3  1999/08/19 12:11:09  mdw
+# More improvements to URL recognizer.
+#
  # Revision 1.2  1999/08/18 17:10:07  mdw
  # Slight improvements to URL and email address parsing.
  #
@@ -59,12 +62,38 @@ sub doc {
    while (my $line = $fh->getline()) {
      last if $line =~ /\f/;
      $line =~ s/\&/&amp;/g;
-    $line =~ s/\</&lt;/g;
-    $line =~ s/\>/&gt;/g;
-    $line =~ s!\b(https?|ftp|file|news):[^]&)\s]*[^]&).,\s\']!<a href="$&">$&</a>!g;
-    $line =~ s!info:([^]&)\s]*[^]&).,\s\'\"])!<a href="$ref?act=info&file=$1&node=Top">$&</a>!g;
-    $line =~ s!(?:\bmailto:)?([^\s()&;:{}.,\`\"][^\s()&;:{}\`\"]*\@[^\s()&;:{}\'\"]*[^\s()&;:{}.,\'\"])!<a href="mailto:$1">$&</a>!g;
-    $line =~ s!([-_.\w]+)\((\d+\w*)\)!SWMan::subst("$1($2)", $1, $2)!eg;
+    $line =~ s/\</&lt;</g;
+    $line =~ s/\>/>&gt;/g;
+
+    # --- Spot URLs (except `mailto') ---
+
+    $line =~ s! \b (http s? | ftp | file | news) :
+               [^])\s<>]* [^]<>&).,\s\']
+             !SWMan::urlsubst($&, $&)!egx;
+
+    # --- Spot email addresses (including `mailto' URLs) ---
+
+    $line =~ s! (?:\bmailto:)?
+               ([^\s()&;:<>&{}.,\`\'\"] [^\s()&;:<>&{}\`\'\"]*
+                \@
+                [^\s()&;:{}<>&\'\"]* [^\s()&;:{}<>&.,\'\"])
+             !<a href="mailto:$1">$&</a>!gx;
+
+    # --- Spot info references ---
+
+    $line =~ s! \b info: ([^]&)\s<>]* [^]&).,\s<>\'\"])
+              !<a href="$ref?act=info&file=$1&node=Top">$&</a>!gx;
+
+    # --- Spot manpage references ---
+
+    $line =~ s! ([-_.\w]+) \( (\d+\w*) \)
+             !SWMan::subst("$1($2)", $1, $2)!egx;
+
+    # --- Finally fix up the HTML properly ---
+
+    $line =~ s/\&lt\;\</&lt;/g;
+    $line =~ s/\>\&gt\;/&gt;/g;
+
      print $line;
    }
    print "</pre>\n";
diff --git a/perl/SWInfo.pm b/perl/SWInfo.pm

index 276ac05..42c4119 100644 (file)
--- a/perl/SWInfo.pm
+++ b/perl/SWInfo.pm
@@ -1,6 +1,6 @@
  # -*-perl-*-
  #
-# $Id: SWInfo.pm,v 1.2 1999/08/18 17:10:07 mdw Exp $
+# $Id: SWInfo.pm,v 1.3 1999/08/19 12:11:10 mdw Exp $
  #
  # Read and output GNU Info files
  #
@@ -28,6 +28,9 @@
  #----- Revision history -----------------------------------------------------
  #
  # $Log: SWInfo.pm,v $
+# Revision 1.3  1999/08/19 12:11:10  mdw
+# More improvements to URL recognizer.
+#
  # Revision 1.2  1999/08/18 17:10:07  mdw
  # Slight improvements to URL and email address parsing.
  #
@@ -109,8 +112,8 @@ sub info {
    # --- Now translate the node into HTML, first line first ---
  
    $n =~ s/\&/&amp;/;
-  $n =~ s/\</&lt;/;
-  $n =~ s/\>/&gt;/;
+  $n =~ s/\</&lt;</;
+  $n =~ s/\>/>&gt;/;
    $n =~ s/\A( [^\n]* Next:\ *) ([^,\n]*) / $1 . subst($2, $file, $i) /eix;
    $n =~ s/\A( [^\n]* Prev:\ *) ([^,\n]*) / $1 . subst($2, $file, $i) /eix;
    $n =~ s/\A( [^\n]* Up:\ *) ([^,\n]*) / $1 . subst($2, $file, $i) /eix;
@@ -144,9 +147,29 @@ sub info {
    }
    $out .= $n;
  
-  $out =~ s!\b(https?|ftp|file|news):[^]&)\s]*[^]&).,\s\'\"]!<a href="$&">$&</a>!g;
-  $out =~ s!(?:\bmailto:)?([^\s()&;:{}.,\`\"][^\s()&;:{}\`\"]*\@[^\s()&;:{}\'\"]*[^\s()&;:{}.,\'\"])!<a href="mailto:$1">$&</a>!g;
-  $out =~ s!([-_.\w]+)\((\d+\w*)\)!SWMan::subst("$1($2)", $1, $2)!eg;
+  # --- Spot URLs (except `mailto') ---
+
+  $out =~ s! \b (http s? | ftp | file | news) :
+            [^]<>)\s]* [^]<>).,\s\']
+          !urlsubst($&, $&)!egx;
+
+  # --- Spot email addresses (including `mailto' URLs) ---
+
+  $out =~ s! (?:\bmailto:)?
+            ([^\s()<>&;:{}.,\`\'\"] [^\s()<>&;:{}\`\'\"]*
+             \@
+             [^\s()<>&;:{}\'\"]* [^\s()<>&;:{}.,\'\"])
+          !<a href="mailto:$1">$&</a>!gx;
+
+  # --- Spot manpage references ---
+
+  $out =~ s! ([-_.\w]+) \( (\d+\w*) \)
+          !SWMan::subst("$1($2)", $1, $2)!egx;
+
+  # --- Fix up the HTML ---
+
+  $out =~ s/\&lt;\</&lt;/g;
+  $out =~ s/\>\&gt;/&gt;/g;
  
    header("Info: ($file)$node");
    print("<pre>\n$out</pre>\n");
diff --git a/perl/SWMan.pm b/perl/SWMan.pm

index b27fca8..e799148 100644 (file)
--- a/perl/SWMan.pm
+++ b/perl/SWMan.pm
@@ -1,6 +1,6 @@
  # -*-perl-*-
  #
-# $Id: SWMan.pm,v 1.2 1999/08/18 17:10:07 mdw Exp $
+# $Id: SWMan.pm,v 1.3 1999/08/19 12:11:10 mdw Exp $
  #
  # Display and other fiddling of manual pages
  #
@@ -28,6 +28,9 @@
  #----- Revision history -----------------------------------------------------
  #
  # $Log: SWMan.pm,v $
+# Revision 1.3  1999/08/19 12:11:10  mdw
+# More improvements to URL recognizer.
+#
  # Revision 1.2  1999/08/18 17:10:07  mdw
  # Slight improvements to URL and email address parsing.
  #
@@ -48,7 +51,7 @@ use SWConfig;
  use SWCGI qw(:DEFAULT :layout);
  
  @ISA = qw(Exporter);
-@EXPORT_OK = qw(subst check);
+@EXPORT_OK = qw(subst urlsubst check);
  
  #----- Useful functions -----------------------------------------------------
  
@@ -107,6 +110,19 @@ sub subst($$$) {
    return "$s";
  }
  
+# --- @urlsubst(URL, STRING)@ ---
+#
+# Substitutes in a URL reference.  The important bit is that embedded `&'
+# characters are un-entitied from `&amp;'.  This doesn't seem to upset
+# Netscape or Lynx as much as I'd expect (or, in fact, at all), but it's
+# slightly untidy.
+
+sub urlsubst($$) {
+  my ($url, $name) = @_;
+  $url =~ s/\&amp\;/&/;
+  return "<a href=\"$url\">$name</a>";
+}
+
  # --- @sections()@ ---
  #
  # Return a list of manual sections.
@@ -266,8 +282,8 @@ sub man {
         # --- Translate the character if it's magical ---
  
         $ch eq "&" and $ch = "&amp;";
-       $ch eq "<" and $ch = "&lt;";
-       $ch eq ">" and $ch = "&gt;";
+       $ch eq "<" and $ch = "&lt;<";
+       $ch eq ">" and $ch = ">&gt;";
         $l .= $ch;
        }
        $state and $l .= "</$state>";
@@ -291,18 +307,23 @@ sub man {
        # --- And email and hypertext references too ---
  
        $l =~ s! ((?:\<[bi]\>)*)         # Leading highlighting
-              ((?:https?|ftp|file|news) # A protocol name
+              ( \b (?: http s? | ftp | file | news ) # A protocol name
                 :                       # The important and obvious bit
-               [^]&)\s]+               # Most characters are allowed
-               [^]&).,\s\'\"])         # Don't end on punctuation
+               [^]<>)\s<>\'\"]+        # Most characters are allowed
+               [^]<>).,\s<>\'\"])      # Don't end on punctuation
                ((?:\</[bi]\>)*)         # Closing tags, optional
-            !$1<a href="$2">$&</a>$3!gx;
+            !urlsubst($2, $&)!egx;
  
-      $l =~ s! ((?:\<[bi]\>)*(?:\bmailto:)?)
-               ( [^\s()&;:{}<>,.\`\"] [^\s()&;:{}<>\`\"]* \@
-                 [^\s()&;:{}<>\'\"]* [^\s()&;:{}<>.,\'\"])
+      $l =~ s! ( (?:\<[bi]\>)* (?:\bmailto:)? )
+               ( [^\s()<>;:{}&<>,.\`\'\"] [^\s()<>;:{}&<>\`\'\"]* \@
+                 [^\s()<>;:{&}<>\'\"]* [^\s()<>;:{}&<>.,\'\"])
                 ((?:\</[bi]\>)*)
-             !$1<a href="mailto:$2">$&</a>$3!gx;
+             !<a href="mailto:$2">$&</a>!gx;
+
+      # --- Fix up the HTML ---
+
+      $l =~ s/\&lt\;\</&lt;/g;
+      $l =~ s/\>\&gt\;/&gt;/g;
  
        # --- Done! ---
author	mdw <mdw>
	Thu, 19 Aug 1999 12:11:10 +0000 (12:11 +0000)
committer	mdw <mdw>
	Thu, 19 Aug 1999 12:11:10 +0000 (12:11 +0000)
perl/SWDoc.pm		patch \| blob \| blame \| history
perl/SWInfo.pm		patch \| blob \| blame \| history
perl/SWMan.pm		patch \| blob \| blame \| history