mtimeout.1: Use correct dash for number ranges.
[misc] / MdwOpt.pm
1 #
2 # MdwOpt.pm
3 #
4 # Options parsing
5 #
6 # (c) 1996 Mark Wooding
7 #
8
9 #----- Notices --------------------------------------------------------------
10 #
11 # This program comes with no warranty, not even of any kind, unless
12 # someone other than the author offers to provide one. It may be used
13 # and distributed under the terms of the GNU General Public Licence, in
14 # the interests of promoting freely available software for Linux.
15
16 package MdwOpt;
17 require 5.00;
18 require Exporter;
19
20 @ISA=qw(Exporter);
21 @EXPORT=qw( );
22
23 #----- The code -------------------------------------------------------------
24
25 # --- MdwOpt::new ---
26 #
27 # Arguments: (scalar) shortopts == short options description
28 # (see below) longopts == long options description
29 # (array ref) arguments == pointer to argument list
30 # (array ref) flags == a number of flags you can set
31 #
32 # Returns: A `MdwOpt' object, which can be used to extract options from
33 # an array of argument strings.
34 #
35 # Use: Creates a `MdwOpt' object which contains all the information
36 # needed to parse a command line. The arguments are a bit
37 # complicated, so I'll explain them below. This implementation
38 # provides a similar level of flexibility to the C `mdwopt'
39 # routine, although the interface is rather different, since it
40 # takes advantage of some of Perl's object-oriented features.
41 #
42 #
43 # How options parsing appears to users
44 #
45 # A command line consists of a number of `words' (which may
46 # contain spaces, according to various shell quoting
47 # conventions). A word may be an option, an argument to an
48 # option, or a non-option. An option begins with a special
49 # character, usually `-', although `+' is also used sometimes.
50 # As special exceptions, the word containing only a `-' is
51 # considered to be a non-option, since it usually represents
52 # standard input or output as a filename, and the word
53 # containing a double-dash `--' is used to mark all following
54 # words as being non-options regardless of their initial
55 # character.
56 #
57 # Traditionally, all words after the first non-option have been
58 # considered to be non-options automatically, so that options
59 # must be specified before filenames. However, this
60 # implementation can extract all the options from the command
61 # line regardless of their position. This can usually be
62 # disabled by setting one of the environment variables
63 # `POSIXLY_CORRECT' or `_POSIX_OPTION_ORDER'.
64 #
65 # There are two different styles of options: `short' and
66 # `long'.
67 #
68 # Short options are the sort which Unix has known for ages: an
69 # option is a single letter, preceded by a `-'. Short options
70 # can be joined together to save space (and possibly to make
71 # silly words): e.g., instead of giving options `-x -y', a user
72 # could write `-xy'. Some short options can have arguments,
73 # which appear after the option letter, either immediately
74 # following, or in the next `word' (so an option with an
75 # argument could be written as `-o foo' or as `-ofoo'). Note
76 # that options with optional arguments must be written in the
77 # second style.
78 #
79 # When a short option controls a flag setting, it is sometimes
80 # possible to explicitly turn the flag off, as well as turning
81 # it on, (usually to override default options). This is
82 # usually done by using a `+' instead of a `-' to introduce the
83 # option.
84 #
85 # Long options, as popularised by the GNU utilities, are given
86 # long-ish memorable names, preceded by a double-dash `--'.
87 # Since their names are more than a single character, long
88 # options can't be combined in the same way as short options.
89 # Arguments to long options may be given either in the same
90 # `word', separated from the option name by an equals sign,
91 # or in the following `word'.
92 #
93 # Long option names can be abbreviated if necessary, as long
94 # as the abbreviation is unique. This means that options can
95 # have sensible and memorable names but still not require much
96 # typing from an experienced user.
97 #
98 # Like short options, long options can control flag settings.
99 # The options to manipulate these settings come in pairs: an
100 # option of the form `--set-flag' might set the flag, while an
101 # option of the form `--no-set-flag' might clear it.
102 #
103 # It is usual for applications to provide both short and long
104 # options with identical behaviour. Some applications with
105 # lots of options may only provide long options (although they
106 # will often be only two or three characters long). In this
107 # case, long options can be preceded with a single `-'
108 # character, and negated by a `+' character.
109 #
110 # Finally, some (older) programs accept arguments of the form
111 # `-<number>', to set some numerical parameter, typically a
112 # line count of some kind.
113 #
114 #
115 # How programs parse options
116 #
117 # The difficult bit is all in the setting up at the beginning.
118 # I've used some funny data structures to try and pack all the
119 # important information away.
120 #
121 # The first `shortopts' argument specifies the allowable short
122 # options, followed by various switch characters which control
123 # option-specific features. Allowable characters are as
124 # follows:
125 #
126 # : option takes a required argument
127 # :: option takes an optional argument
128 # + option may be negated
129 #
130 # Note that the `+' must appear /before/ the `:' characters.
131 #
132 # The `longopts' argument is a reference to a hash, containing
133 # various pieces of information. (Using a reference here means
134 # that we can pass other aggregate values around. It also
135 # might save a little memory.) The hash contains an item for
136 # each long option string you want to support: the option's
137 # name is the key; the value is another hash reference
138 # containing information about the option. This sub-hash
139 # should contain a number of the following items:
140 #
141 # Key Use
142 # ~~~ ~~~
143 #
144 # return Value to return when this option is found.
145 # May be any sort of non-false scalar value.
146 #
147 # arg Information about the argument for this
148 # option. May be one of the strings `none',
149 # `opt' and `req'. (Actually `none' is the
150 # same as a false value, and `req' is the same
151 # as any other true value.)
152 #
153 # negate If true, allow the option to be negated.
154 #
155 # The `flags' argument is a reference to a array containing
156 # items from the following table.
157 #
158 # Flag Use
159 # ~~~~ ~~~
160 #
161 # nolong Don't support any long options
162 # noshort Don't support any short options
163 # numeric Support numeric options
164 # negate Support negated options
165 # env Read options from environment variable
166 # permute Force permuting of the argument list
167 # inorder Read options in order
168 # posix Force use of POSIX option semantics
169 # quiet Don't report errors when they happen
170
171 sub new
172 {
173 my $class=shift;
174 my $self=bless {}; # Make an empty reference for me
175 my ($short,$long,$argv,$flags)=@_; # Read the caller's arguments
176 my ($x); # Temporaries for copying
177 my ($prog); # Program name read from argv[0]
178
179 # --- Set up the simple parts of the structure ---
180
181 @{$self->{argv}}=@$argv; # Copy the arguments list
182
183 $self->{flags}={}; # Clear the flags hash out
184 foreach $x (@$flags) { $self->{flags}{$x}=1; }
185
186 $self->{short}=$short; # Copy the short options string
187 $self->{long}=$long; # Take a reference to the long opts
188
189 # --- Get the arguments list sorted out ---
190
191 $prog=$0; # Read the program name
192 $prog =~ s|^.*/||; # Strip leading gubbins from it
193 $self->{prog}=$prog; # This as the program name
194
195 # --- Play with the ordering settings ---
196
197 unless ($self->{flags}{permute} ||
198 $self->{flags}{inorder} ||
199 $self->{flags}{posix})
200 {
201 if (defined($ENV{'POSIXLY_CORRECT'}) ||
202 defined($ENV{'_POSIX_OPTION_ORDER'}))
203 { $self->{flags}{posix}=1; }
204 else
205 { $self->{flags}{permute}=1; }
206 }
207
208 # --- Set up the environment variable, if we're reading that ---
209 #
210 # List concatenation is so easy ;-) This is actually better than the C
211 # version, although much less efficient, since it works `properly' with
212 # non-options in the options string.
213
214 @{$self->{argv}}=(split(' ',$ENV{uc($self->{prog})}),@{$self->{argv}})
215 if ($self->{flags}{env});
216
217 # --- Initialise persistent state bits ---
218
219 $self->{rest}=[]; # No non-options found yet
220 $self->{this}=''; # We're not in a shortopt group
221
222 # --- That's it, so we're done now ---
223
224 return ($self);
225 }
226
227 # --- mo->err ---
228 #
229 # Arguments: (scalar) error == a string to return
230 #
231 # Returns: A suitable error message from mo->read.
232 #
233 # Use: Contructs an error return and maybe displays the message to
234 # the user.
235
236 sub err
237 {
238 my ($self,$msg)=@_;
239
240 print STDERR "$self->{prog}: $msg\n"
241 unless $self->{flags}{quiet};
242 return ($msg);
243 }
244
245 # --- mo->read ---
246 #
247 # Arguments: --
248 #
249 # Returns: A list containing interesting things about the option
250 #
251 # Use: Returns information about the next option read. The list
252 # contains, in order:
253 #
254 # * The `value' of the option, with a suffix `+' if negated
255 # * The argument passed to the option
256 #
257 # Non-options are reported by passing a `value' of an empty
258 # string. The end of the options is reported by returning
259 # `undef' as the value. An error is returned my setting
260 # `value' to `?' and putting the error message in the argument
261 # field.
262
263 sub read
264 {
265 my ($self)=@_; # Read the arguments list
266 my ($opt,$arg,$prefix);
267
268 if ($self->{this} eq '') # Have we any shortopts left?
269 {
270 $self->{flags}{_neg}=0; # This option isn't negated yet
271
272 # --- Find the next option to handle ---
273
274 arg: for (;;)
275 {
276 $opt=shift(@{$self->{argv}}); # Shift out the next option
277 return (undef,undef)
278 unless (defined($opt));
279
280 if ($opt =~ /^-/ || ($opt =~ /^\+/ && $self->{flags}{negate}))
281 {
282 if ($opt eq '--') # If no more options at all
283 {
284 push(@{$self->{rest}},@{$self->{argv}});
285 return (undef,undef); # Return two undefined values
286 }
287 elsif (length($opt)!=1)
288 { last arg; } # Otherwise we've found an option
289 }
290
291 switch: {
292 push(@{$self->{rest}},$opt,@{$self->{argv}}),
293 return (undef,undef) # And return two undefined values
294 if $self->{flags}{posix};
295
296 return ('',$opt) # Return this non-option
297 if $self->{flags}{inorder};
298
299 push(@{$self->{rest}},$opt) # Add to the `rest' list
300 ;
301 }
302 }
303
304 # --- Check for a numeric option ---
305
306 return ('#',substr($opt,1))
307 if $self->{flags}{numeric} && $opt =~ /^-[+-]?[0-9]/;
308
309 # --- Handle long options ---
310 #
311 # This is where things start getting hairy.
312
313 if (($opt =~ /^--/ || $self->{flags}{noshort}) &&
314 !$self->{flags}{nolong})
315 {
316 my ($match,$key,$real);
317
318 # --- Extract the prefix, option name and argument ---
319 #
320 # This is rather easier than the C version.
321
322 ($self->{flags}{negate}) ?
323 (($prefix,$opt) = $opt =~ /^(\+|--no-|--|-)(.*)/) :
324 (($prefix,$opt) = $opt =~ /^(\+|--)(.*)/);
325 $self->{flags}{_neg}=1 if ($prefix eq '+' || $prefix eq '--no-');
326
327 ($opt,$arg)=($`,$') if $opt =~ /=/;
328
329 # --- Now try and find an entry in the hash table ---
330
331 longopt: foreach $key (keys(%{$self->{long}}))
332 {
333 next longopt
334 if $self->{flags}{_neg} && !$self->{long}{$key}{negate};
335
336 ($match,$real)=($self->{long}{$key},$key),
337 last longopt
338 if $key eq $opt;
339
340 next longopt
341 if length($key)<length($opt) ||
342 $opt ne substr($key,0,length($opt));
343
344 $match=undef,
345 last longopt
346 if defined($match);
347
348 ($match,$real)=($self->{long}{$key},$key)
349 ;
350 }
351
352 return ('?',$self->err("unrecognised option `$prefix$opt'"))
353 unless defined($match);
354
355 if ($match->{arg} eq 'none' || !$match->{arg})
356 {
357 return ('?',
358 $self->err("option `$prefix$real' does not accept " .
359 "arguments"))
360 if $arg;
361 }
362 elsif ($match->{arg} ne 'opt')
363 {
364 $arg=shift(@{$self->{argv}})
365 unless $arg;
366 return ('?',$self->err("option `$prefix$real' requires an argument"))
367 unless defined($arg);
368 }
369
370 $opt=($match->{"return"} || $real);
371 $opt .= '+' if ($self->{flags}{_neg});
372 return ($opt,$arg);
373 }
374
375 # --- Right, it must be a short option ---
376
377 $self->{flags}{_neg}=1 if ($opt =~ /^\+/);
378 $self->{this}=substr($opt,1);
379 }
380
381 # --- Handle the next short option ---
382
383 ($opt,$self->{this})=(substr($self->{this},0,1),substr($self->{this},1));
384 $prefix=($self->{flags}{_neg} ? '+' : '-');
385
386 if ($self->{short} =~ /\Q$opt/ &&
387 (!$self->{flags}{_neg} || substr($',0,1) eq '+'))
388 {
389 my ($rest,$arg)=($',undef);
390
391 # --- Found an option, so handle the argument ---
392
393 $rest =~ /^\+?(:{0,2})/;
394 if ($1)
395 {
396 $arg=$self->{this};
397 $self->{this}='';
398 if ($1 eq ':' && !$arg)
399 {
400 $arg=shift(@{$self->{argv}});
401 return ('?',$self->err("option `$prefix$opt' requires an argument"))
402 unless defined($arg);
403 }
404 }
405
406 $opt.='+' if $self->{flags}{_neg};
407 return ($opt,$arg);
408 }
409 return ('?',$self->err("unrecognised option `$prefix$opt'"));
410 }
411
412 # --- mo->rest ---
413 #
414 # Arguments: --
415 #
416 # Returns: A list containing the remaining command line items in order.
417 #
418 # Use: Returns all the unprocessed command line arguments.
419
420 sub rest
421 {
422 my ($self)=@_;
423 return (@{$self->{rest}});
424 }
425
426 # --- prog ---
427 #
428 # Arguments: --
429 #
430 # Returns: The program name, read from $0.
431 #
432 # Use: Returns the name of the program, with leading path elements
433 # snipped off. You can call this either as a class method or
434 # by passing a MdwOpt object.
435
436 sub prog { $0 =~ m|^.*/| ? $' : $0 }
437
438 1;