@@@ much mess, mostly manpages
[mLib] / utils / str.3.in
CommitLineData
b6b9d458 1.\" -*-nroff-*-
c4ccbbf9
MW
2.\"
3.\" Manual for string utilities
4.\"
5.\" (c) 1999--2001, 2005--2007, 2009, 2019, 2024 Straylight/Edgeware
6.\"
7.
8.\"----- Licensing notice ---------------------------------------------------
9.\"
10.\" This file is part of the mLib utilities library.
11.\"
12.\" mLib is free software: you can redistribute it and/or modify it under
13.\" the terms of the GNU Library General Public License as published by
14.\" the Free Software Foundation; either version 2 of the License, or (at
15.\" your option) any later version.
16.\"
17.\" mLib is distributed in the hope that it will be useful, but WITHOUT
18.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20.\" License for more details.
21.\"
22.\" You should have received a copy of the GNU Library General Public
23.\" License along with mLib. If not, write to the Free Software
24.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25.\" USA.
26.
27.\"--------------------------------------------------------------------------
28.so ../defs.man \" @@@PRE@@@
29.
30.\"--------------------------------------------------------------------------
31.TH str 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
efae42a6 32.\" @str_qword
33.\" @str_qsplit
08da152e 34.\" @str_getword
35.\" @str_split
26f325c0 36.\" @str_matchx
efae42a6 37.\" @str_match
08da152e 38.\" @str_sanitize
c4ccbbf9
MW
39.
40.\"--------------------------------------------------------------------------
41.SH NAME
42str \- small string utilities
43.
44.\"--------------------------------------------------------------------------
b6b9d458 45.SH SYNOPSIS
c4ccbbf9 46.
b6b9d458 47.nf
48.B "#include <mLib/str.h>"
d056fbdf 49.PP
efae42a6 50.BI "char *str_qword(char **" pp ", unsigned " f );
51.BI "size_t str_qsplit(char *" p ", char *" v "[], size_t " c ,
52.BI " char **" rest ", unsigned " f );
b6b9d458 53.BI "char *str_getword(char **" pp );
54.BI "size_t str_split(char *" p ", char *" v "[], size_t " c ", char **" rest );
26f325c0 55.BI "int str_matchx(const char *" p ", const char *" s ", unsigned " f );
efae42a6 56.BI "int str_match(const char *" p ", const char *" s );
b6b9d458 57.BI "void str_sanitize(char *" d ", const char *" p ", size_t " sz );
58.fi
c4ccbbf9
MW
59.
60.\"--------------------------------------------------------------------------
b6b9d458 61.SH DESCRIPTION
c4ccbbf9 62.
b6b9d458 63The header file
64.B <mLib/str.h>
65contains a few small utility functions for manipulating null-terminated
d4efbcd9 66strings.
b6b9d458 67.PP
68The function
efae42a6 69.B str_qword
b6b9d458 70extracts the next whitespace-delimited word from a string. The
71function's argument,
72.IR pp ,
73is the address of a pointer into the string: this pointer is updated by
efae42a6 74.B str_qword
b6b9d458 75so that it can extract the following word on the next call and so on.
76The return value is the address of the next word, appropriately null
77terminated. A null pointer is returned if the entire remainder of the
78string is whitespace. Note that
efae42a6 79.B str_qword
b6b9d458 80modifies the string as it goes, to null-terminate the individual words.
efae42a6 81If the flag
82.B STRF_QUOTE
83is passed, the single- and double-quote characters may be used to quote
84whitespace within words, and the backslash can escape quote characters
85and whitespace.
b6b9d458 86.PP
87The function
efae42a6 88.B str_qsplit
b6b9d458 89divides a string into whitespace-separated words. The arguments are as
90follows:
91.TP
ff76c38f 92.BI "char *" p
b6b9d458 93The address of the string to split. The string is modified by having
94null terminators written after each word extracted.
95.TP
ff76c38f 96.BI "char *" v []
b6b9d458 97The address of an array of pointers to characters. This array will be
98filled in by
99.BR str_split :
100the first entry will point to the first word extracted from the string,
101and so on. If there aren't enough words in the string, the remaining
102array elements are filled with null pointers.
103.TP
ff76c38f 104.BI "size_t " c
d2a91066 105The maximum number of words to extract; also, the number of elements in
b6b9d458 106the array
107.IR v .
108.TP
ff76c38f 109.BI "char **" rest
b6b9d458 110The address of a pointer in which to store the address of the remainder
111of the string. Leading whitespace is removed from the remainder before
112storing. If the remainder string is empty, a null pointer is stored
113instead. If
114.I rest
115is null, the remainder pointer is discarded.
efae42a6 116.TP
117.BI "unsigned " f
118Flags, as for
119.BR str_qsplit .
b6b9d458 120.PP
121The return value of
efae42a6 122.B str_qsplit
b6b9d458 123is the number of words extracted from the input string.
124.PP
efae42a6 125The functions
126.B str_getword
127and
128.B str_split
129are veneers over
130.B str_qword
131and
132.B str_qsplit
133respectively; they are equivalent to calls to the latter functions with
134flags words of zero.
135.PP
136The
26f325c0 137.B str_matchx
efae42a6 138function does simple wildcard matching. The first argument is a
139pattern, which may contain metacharacters:
140.RB ` * '
141matches zero or more arbitrary characters;
142.RB ` ? '
143matches exactly one arbitrary characters; and
144.RB ` [ ... ] '
145matches one of the characters listed. The backslash
146.RB ` \e '
147escapes the following character. Within square brackets, the
148hyphen
149.RB ` \- '
150may be used to designate ranges of characters. If the initial character
151is
152.RB ` ! '
153or
154.RB ` ^ '
155then the sense of the match is reversed. To literally match a
156.RB ` ] '
157character, list it first; to literally match a
158.RB ` \- '
159character, list it immediately after a range, or at the beginning or end
160of the set. The return value is nonzero if the pattern
161.I p
162matches the given string
163.IR s ,
26f325c0
MW
164or zero if the pattern doesn't match. If the flag
165.B STRF_PREFIX
166is passed,
167.B str_matchx
168returns true if it reaches the end of the target string before finding a
169mismatch \(en i.e., if the target string is a prefix of a string which
170might match the pattern. The function
171.B str_match
172is a convenient wrapper for
173.B str_matchx
174with a zero flags word, which is the normal case.
efae42a6 175.PP
b6b9d458 176The function
177.B str_sanitize
178copies at most
5e80901e
MW
179.I sz
180\- 1
b6b9d458 181characters from the string
182.I p
183to
184.IR d .
185The result string is null terminated. Any nonprinting characters in
186.I p
187are replaced by an underscore
188.RB ` _ '
189when written to
190.IR d .
c4ccbbf9
MW
191.
192.\"--------------------------------------------------------------------------
b6b9d458 193.SH EXAMPLES
c4ccbbf9 194.
b6b9d458 195Given the code
196.VS
197char p[] = " alpha beta gamma delta ";
198char *v[3];
199size_t n;
200char *q;
d056fbdf 201.VP
b6b9d458 202n = str_split(p, v, 3, &q);
203.VE
204following the call to
205.BR str_split ,
206.B n
207will have the value 3,
208.B v[0]
209will point to
210.RB ` alpha ',
211.B v[1]
212will point to
213.RB ` beta ',
214.B v[2]
215will point to
216.RB ` gamma '
217and
218.B rest
219will point to
220.RB ` delta\ '
221(note the trailing space).
222.PP
223Similarly, given the string
224.B """\ alpha\ \ beta\ """
225instead,
226.B n
227will be assigned the value 2,
228.B v[0]
229and
230.B v[1]
231will have the same values as last time, and
232.B v[2]
233and
234.B rest
235will be null.
c4ccbbf9
MW
236.
237.\"--------------------------------------------------------------------------
08da152e 238.SH "SEE ALSO"
c4ccbbf9 239.
08da152e 240.BR mLib (3).
c4ccbbf9
MW
241.
242.\"--------------------------------------------------------------------------
b6b9d458 243.SH AUTHOR
c4ccbbf9 244.
9b5ac6ff 245Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9
MW
246.
247.\"----- That's all, folks --------------------------------------------------