| 1 | # |
| 2 | # MdwOpt.pm |
| 3 | # |
| 4 | # Options parsing |
| 5 | # |
| 6 | # (c) 1996 Mark Wooding |
| 7 | # |
| 8 | |
| 9 | #----- Notices -------------------------------------------------------------- |
| 10 | # |
| 11 | # This program comes with no warranty, not even of any kind, unless |
| 12 | # someone other than the author offers to provide one. It may be used |
| 13 | # and distributed under the terms of the GNU General Public Licence, in |
| 14 | # the interests of promoting freely available software for Linux. |
| 15 | |
| 16 | package MdwOpt; |
| 17 | require 5.00; |
| 18 | require Exporter; |
| 19 | |
| 20 | @ISA=qw(Exporter); |
| 21 | @EXPORT=qw( ); |
| 22 | |
| 23 | #----- The code ------------------------------------------------------------- |
| 24 | |
| 25 | # --- MdwOpt::new --- |
| 26 | # |
| 27 | # Arguments: (scalar) shortopts == short options description |
| 28 | # (see below) longopts == long options description |
| 29 | # (array ref) arguments == pointer to argument list |
| 30 | # (array ref) flags == a number of flags you can set |
| 31 | # |
| 32 | # Returns: A `MdwOpt' object, which can be used to extract options from |
| 33 | # an array of argument strings. |
| 34 | # |
| 35 | # Use: Creates a `MdwOpt' object which contains all the information |
| 36 | # needed to parse a command line. The arguments are a bit |
| 37 | # complicated, so I'll explain them below. This implementation |
| 38 | # provides a similar level of flexibility to the C `mdwopt' |
| 39 | # routine, although the interface is rather different, since it |
| 40 | # takes advantage of some of Perl's object-oriented features. |
| 41 | # |
| 42 | # |
| 43 | # How options parsing appears to users |
| 44 | # |
| 45 | # A command line consists of a number of `words' (which may |
| 46 | # contain spaces, according to various shell quoting |
| 47 | # conventions). A word may be an option, an argument to an |
| 48 | # option, or a non-option. An option begins with a special |
| 49 | # character, usually `-', although `+' is also used sometimes. |
| 50 | # As special exceptions, the word containing only a `-' is |
| 51 | # considered to be a non-option, since it usually represents |
| 52 | # standard input or output as a filename, and the word |
| 53 | # containing a double-dash `--' is used to mark all following |
| 54 | # words as being non-options regardless of their initial |
| 55 | # character. |
| 56 | # |
| 57 | # Traditionally, all words after the first non-option have been |
| 58 | # considered to be non-options automatically, so that options |
| 59 | # must be specified before filenames. However, this |
| 60 | # implementation can extract all the options from the command |
| 61 | # line regardless of their position. This can usually be |
| 62 | # disabled by setting one of the environment variables |
| 63 | # `POSIXLY_CORRECT' or `_POSIX_OPTION_ORDER'. |
| 64 | # |
| 65 | # There are two different styles of options: `short' and |
| 66 | # `long'. |
| 67 | # |
| 68 | # Short options are the sort which Unix has known for ages: an |
| 69 | # option is a single letter, preceded by a `-'. Short options |
| 70 | # can be joined together to save space (and possibly to make |
| 71 | # silly words): e.g., instead of giving options `-x -y', a user |
| 72 | # could write `-xy'. Some short options can have arguments, |
| 73 | # which appear after the option letter, either immediately |
| 74 | # following, or in the next `word' (so an option with an |
| 75 | # argument could be written as `-o foo' or as `-ofoo'). Note |
| 76 | # that options with optional arguments must be written in the |
| 77 | # second style. |
| 78 | # |
| 79 | # When a short option controls a flag setting, it is sometimes |
| 80 | # possible to explicitly turn the flag off, as well as turning |
| 81 | # it on, (usually to override default options). This is |
| 82 | # usually done by using a `+' instead of a `-' to introduce the |
| 83 | # option. |
| 84 | # |
| 85 | # Long options, as popularised by the GNU utilities, are given |
| 86 | # long-ish memorable names, preceded by a double-dash `--'. |
| 87 | # Since their names are more than a single character, long |
| 88 | # options can't be combined in the same way as short options. |
| 89 | # Arguments to long options may be given either in the same |
| 90 | # `word', separated from the option name by an equals sign, |
| 91 | # or in the following `word'. |
| 92 | # |
| 93 | # Long option names can be abbreviated if necessary, as long |
| 94 | # as the abbreviation is unique. This means that options can |
| 95 | # have sensible and memorable names but still not require much |
| 96 | # typing from an experienced user. |
| 97 | # |
| 98 | # Like short options, long options can control flag settings. |
| 99 | # The options to manipulate these settings come in pairs: an |
| 100 | # option of the form `--set-flag' might set the flag, while an |
| 101 | # option of the form `--no-set-flag' might clear it. |
| 102 | # |
| 103 | # It is usual for applications to provide both short and long |
| 104 | # options with identical behaviour. Some applications with |
| 105 | # lots of options may only provide long options (although they |
| 106 | # will often be only two or three characters long). In this |
| 107 | # case, long options can be preceded with a single `-' |
| 108 | # character, and negated by a `+' character. |
| 109 | # |
| 110 | # Finally, some (older) programs accept arguments of the form |
| 111 | # `-<number>', to set some numerical parameter, typically a |
| 112 | # line count of some kind. |
| 113 | # |
| 114 | # |
| 115 | # How programs parse options |
| 116 | # |
| 117 | # The difficult bit is all in the setting up at the beginning. |
| 118 | # I've used some funny data structures to try and pack all the |
| 119 | # important information away. |
| 120 | # |
| 121 | # The first `shortopts' argument specifies the allowable short |
| 122 | # options, followed by various switch characters which control |
| 123 | # option-specific features. Allowable characters are as |
| 124 | # follows: |
| 125 | # |
| 126 | # : option takes a required argument |
| 127 | # :: option takes an optional argument |
| 128 | # + option may be negated |
| 129 | # |
| 130 | # Note that the `+' must appear /before/ the `:' characters. |
| 131 | # |
| 132 | # The `longopts' argument is a reference to a hash, containing |
| 133 | # various pieces of information. (Using a reference here means |
| 134 | # that we can pass other aggregate values around. It also |
| 135 | # might save a little memory.) The hash contains an item for |
| 136 | # each long option string you want to support: the option's |
| 137 | # name is the key; the value is another hash reference |
| 138 | # containing information about the option. This sub-hash |
| 139 | # should contain a number of the following items: |
| 140 | # |
| 141 | # Key Use |
| 142 | # ~~~ ~~~ |
| 143 | # |
| 144 | # return Value to return when this option is found. |
| 145 | # May be any sort of non-false scalar value. |
| 146 | # |
| 147 | # arg Information about the argument for this |
| 148 | # option. May be one of the strings `none', |
| 149 | # `opt' and `req'. (Actually `none' is the |
| 150 | # same as a false value, and `req' is the same |
| 151 | # as any other true value.) |
| 152 | # |
| 153 | # negate If true, allow the option to be negated. |
| 154 | # |
| 155 | # The `flags' argument is a reference to a array containing |
| 156 | # items from the following table. |
| 157 | # |
| 158 | # Flag Use |
| 159 | # ~~~~ ~~~ |
| 160 | # |
| 161 | # nolong Don't support any long options |
| 162 | # noshort Don't support any short options |
| 163 | # numeric Support numeric options |
| 164 | # negate Support negated options |
| 165 | # env Read options from environment variable |
| 166 | # permute Force permuting of the argument list |
| 167 | # inorder Read options in order |
| 168 | # posix Force use of POSIX option semantics |
| 169 | # quiet Don't report errors when they happen |
| 170 | |
| 171 | sub new |
| 172 | { |
| 173 | my $class=shift; |
| 174 | my $self=bless {}; # Make an empty reference for me |
| 175 | my ($short,$long,$argv,$flags)=@_; # Read the caller's arguments |
| 176 | my ($x); # Temporaries for copying |
| 177 | my ($prog); # Program name read from argv[0] |
| 178 | |
| 179 | # --- Set up the simple parts of the structure --- |
| 180 | |
| 181 | @{$self->{argv}}=@$argv; # Copy the arguments list |
| 182 | |
| 183 | $self->{flags}={}; # Clear the flags hash out |
| 184 | foreach $x (@$flags) { $self->{flags}{$x}=1; } |
| 185 | |
| 186 | $self->{short}=$short; # Copy the short options string |
| 187 | $self->{long}=$long; # Take a reference to the long opts |
| 188 | |
| 189 | # --- Get the arguments list sorted out --- |
| 190 | |
| 191 | $prog=$0; # Read the program name |
| 192 | $prog =~ s|^.*/||; # Strip leading gubbins from it |
| 193 | $self->{prog}=$prog; # This as the program name |
| 194 | |
| 195 | # --- Play with the ordering settings --- |
| 196 | |
| 197 | unless ($self->{flags}{permute} || |
| 198 | $self->{flags}{inorder} || |
| 199 | $self->{flags}{posix}) |
| 200 | { |
| 201 | if (defined($ENV{'POSIXLY_CORRECT'}) || |
| 202 | defined($ENV{'_POSIX_OPTION_ORDER'})) |
| 203 | { $self->{flags}{posix}=1; } |
| 204 | else |
| 205 | { $self->{flags}{permute}=1; } |
| 206 | } |
| 207 | |
| 208 | # --- Set up the environment variable, if we're reading that --- |
| 209 | # |
| 210 | # List concatenation is so easy ;-) This is actually better than the C |
| 211 | # version, although much less efficient, since it works `properly' with |
| 212 | # non-options in the options string. |
| 213 | |
| 214 | @{$self->{argv}}=(split(' ',$ENV{uc($self->{prog})}),@{$self->{argv}}) |
| 215 | if ($self->{flags}{env}); |
| 216 | |
| 217 | # --- Initialise persistent state bits --- |
| 218 | |
| 219 | $self->{rest}=[]; # No non-options found yet |
| 220 | $self->{this}=''; # We're not in a shortopt group |
| 221 | |
| 222 | # --- That's it, so we're done now --- |
| 223 | |
| 224 | return ($self); |
| 225 | } |
| 226 | |
| 227 | # --- mo->err --- |
| 228 | # |
| 229 | # Arguments: (scalar) error == a string to return |
| 230 | # |
| 231 | # Returns: A suitable error message from mo->read. |
| 232 | # |
| 233 | # Use: Contructs an error return and maybe displays the message to |
| 234 | # the user. |
| 235 | |
| 236 | sub err |
| 237 | { |
| 238 | my ($self,$msg)=@_; |
| 239 | |
| 240 | print STDERR "$self->{prog}: $msg\n" |
| 241 | unless $self->{flags}{quiet}; |
| 242 | return ($msg); |
| 243 | } |
| 244 | |
| 245 | # --- mo->read --- |
| 246 | # |
| 247 | # Arguments: -- |
| 248 | # |
| 249 | # Returns: A list containing interesting things about the option |
| 250 | # |
| 251 | # Use: Returns information about the next option read. The list |
| 252 | # contains, in order: |
| 253 | # |
| 254 | # * The `value' of the option, with a suffix `+' if negated |
| 255 | # * The argument passed to the option |
| 256 | # |
| 257 | # Non-options are reported by passing a `value' of an empty |
| 258 | # string. The end of the options is reported by returning |
| 259 | # `undef' as the value. An error is returned my setting |
| 260 | # `value' to `?' and putting the error message in the argument |
| 261 | # field. |
| 262 | |
| 263 | sub read |
| 264 | { |
| 265 | my ($self)=@_; # Read the arguments list |
| 266 | my ($opt,$arg,$prefix); |
| 267 | |
| 268 | if ($self->{this} eq '') # Have we any shortopts left? |
| 269 | { |
| 270 | $self->{flags}{_neg}=0; # This option isn't negated yet |
| 271 | |
| 272 | # --- Find the next option to handle --- |
| 273 | |
| 274 | arg: for (;;) |
| 275 | { |
| 276 | $opt=shift(@{$self->{argv}}); # Shift out the next option |
| 277 | return (undef,undef) |
| 278 | unless (defined($opt)); |
| 279 | |
| 280 | if ($opt =~ /^-/ || ($opt =~ /^\+/ && $self->{flags}{negate})) |
| 281 | { |
| 282 | if ($opt eq '--') # If no more options at all |
| 283 | { |
| 284 | push(@{$self->{rest}},@{$self->{argv}}); |
| 285 | return (undef,undef); # Return two undefined values |
| 286 | } |
| 287 | elsif (length($opt)!=1) |
| 288 | { last arg; } # Otherwise we've found an option |
| 289 | } |
| 290 | |
| 291 | switch: { |
| 292 | push(@{$self->{rest}},$opt,@{$self->{argv}}), |
| 293 | return (undef,undef) # And return two undefined values |
| 294 | if $self->{flags}{posix}; |
| 295 | |
| 296 | return ('',$opt) # Return this non-option |
| 297 | if $self->{flags}{inorder}; |
| 298 | |
| 299 | push(@{$self->{rest}},$opt) # Add to the `rest' list |
| 300 | ; |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | # --- Check for a numeric option --- |
| 305 | |
| 306 | return ('#',substr($opt,1)) |
| 307 | if $self->{flags}{numeric} && $opt =~ /^-[+-]?[0-9]/; |
| 308 | |
| 309 | # --- Handle long options --- |
| 310 | # |
| 311 | # This is where things start getting hairy. |
| 312 | |
| 313 | if (($opt =~ /^--/ || $self->{flags}{noshort}) && |
| 314 | !$self->{flags}{nolong}) |
| 315 | { |
| 316 | my ($match,$key,$real); |
| 317 | |
| 318 | # --- Extract the prefix, option name and argument --- |
| 319 | # |
| 320 | # This is rather easier than the C version. |
| 321 | |
| 322 | ($self->{flags}{negate}) ? |
| 323 | (($prefix,$opt) = $opt =~ /^(\+|--no-|--|-)(.*)/) : |
| 324 | (($prefix,$opt) = $opt =~ /^(\+|--)(.*)/); |
| 325 | $self->{flags}{_neg}=1 if ($prefix eq '+' || $prefix eq '--no-'); |
| 326 | |
| 327 | ($opt,$arg)=($`,$') if $opt =~ /=/; |
| 328 | |
| 329 | # --- Now try and find an entry in the hash table --- |
| 330 | |
| 331 | longopt: foreach $key (keys(%{$self->{long}})) |
| 332 | { |
| 333 | next longopt |
| 334 | if $self->{flags}{_neg} && !$self->{long}{$key}{negate}; |
| 335 | |
| 336 | ($match,$real)=($self->{long}{$key},$key), |
| 337 | last longopt |
| 338 | if $key eq $opt; |
| 339 | |
| 340 | next longopt |
| 341 | if length($key)<length($opt) || |
| 342 | $opt ne substr($key,0,length($opt)); |
| 343 | |
| 344 | $match=undef, |
| 345 | last longopt |
| 346 | if defined($match); |
| 347 | |
| 348 | ($match,$real)=($self->{long}{$key},$key) |
| 349 | ; |
| 350 | } |
| 351 | |
| 352 | return ('?',$self->err("unrecognised option `$prefix$opt'")) |
| 353 | unless defined($match); |
| 354 | |
| 355 | if ($match->{arg} eq 'none' || !$match->{arg}) |
| 356 | { |
| 357 | return ('?', |
| 358 | $self->err("option `$prefix$real' does not accept " . |
| 359 | "arguments")) |
| 360 | if $arg; |
| 361 | } |
| 362 | elsif ($match->{arg} ne 'opt') |
| 363 | { |
| 364 | $arg=shift(@{$self->{argv}}) |
| 365 | unless $arg; |
| 366 | return ('?',$self->err("option `$prefix$real' requires an argument")) |
| 367 | unless defined($arg); |
| 368 | } |
| 369 | |
| 370 | $opt=($match->{"return"} || $real); |
| 371 | $opt .= '+' if ($self->{flags}{_neg}); |
| 372 | return ($opt,$arg); |
| 373 | } |
| 374 | |
| 375 | # --- Right, it must be a short option --- |
| 376 | |
| 377 | $self->{flags}{_neg}=1 if ($opt =~ /^\+/); |
| 378 | $self->{this}=substr($opt,1); |
| 379 | } |
| 380 | |
| 381 | # --- Handle the next short option --- |
| 382 | |
| 383 | ($opt,$self->{this})=(substr($self->{this},0,1),substr($self->{this},1)); |
| 384 | $prefix=($self->{flags}{_neg} ? '+' : '-'); |
| 385 | |
| 386 | if ($self->{short} =~ /\Q$opt/ && |
| 387 | (!$self->{flags}{_neg} || substr($',0,1) eq '+')) |
| 388 | { |
| 389 | my ($rest,$arg)=($',undef); |
| 390 | |
| 391 | # --- Found an option, so handle the argument --- |
| 392 | |
| 393 | $rest =~ /^\+?(:{0,2})/; |
| 394 | if ($1) |
| 395 | { |
| 396 | $arg=$self->{this}; |
| 397 | $self->{this}=''; |
| 398 | if ($1 eq ':' && !$arg) |
| 399 | { |
| 400 | $arg=shift(@{$self->{argv}}); |
| 401 | return ('?',$self->err("option `$prefix$opt' requires an argument")) |
| 402 | unless defined($arg); |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | $opt.='+' if $self->{flags}{_neg}; |
| 407 | return ($opt,$arg); |
| 408 | } |
| 409 | return ('?',$self->err("unrecognised option `$prefix$opt'")); |
| 410 | } |
| 411 | |
| 412 | # --- mo->rest --- |
| 413 | # |
| 414 | # Arguments: -- |
| 415 | # |
| 416 | # Returns: A list containing the remaining command line items in order. |
| 417 | # |
| 418 | # Use: Returns all the unprocessed command line arguments. |
| 419 | |
| 420 | sub rest |
| 421 | { |
| 422 | my ($self)=@_; |
| 423 | return (@{$self->{rest}}); |
| 424 | } |
| 425 | |
| 426 | # --- prog --- |
| 427 | # |
| 428 | # Arguments: -- |
| 429 | # |
| 430 | # Returns: The program name, read from $0. |
| 431 | # |
| 432 | # Use: Returns the name of the program, with leading path elements |
| 433 | # snipped off. You can call this either as a class method or |
| 434 | # by passing a MdwOpt object. |
| 435 | |
| 436 | sub prog { $0 =~ m|^.*/| ? $' : $0 } |
| 437 | |
| 438 | 1; |