@@@ much mess, mostly manpages
[mLib] / utils / str.3.in
1 .\" -*-nroff-*-
2 .\"
3 .\" Manual for string utilities
4 .\"
5 .\" (c) 1999--2001, 2005--2007, 2009, 2019, 2024 Straylight/Edgeware
6 .\"
7 .
8 .\"----- Licensing notice ---------------------------------------------------
9 .\"
10 .\" This file is part of the mLib utilities library.
11 .\"
12 .\" mLib is free software: you can redistribute it and/or modify it under
13 .\" the terms of the GNU Library General Public License as published by
14 .\" the Free Software Foundation; either version 2 of the License, or (at
15 .\" your option) any later version.
16 .\"
17 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
18 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 .\" License for more details.
21 .\"
22 .\" You should have received a copy of the GNU Library General Public
23 .\" License along with mLib. If not, write to the Free Software
24 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 .\" USA.
26 .
27 .\"--------------------------------------------------------------------------
28 .so ../defs.man \" @@@PRE@@@
29 .
30 .\"--------------------------------------------------------------------------
31 .TH str 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
32 .\" @str_qword
33 .\" @str_qsplit
34 .\" @str_getword
35 .\" @str_split
36 .\" @str_matchx
37 .\" @str_match
38 .\" @str_sanitize
39 .
40 .\"--------------------------------------------------------------------------
41 .SH NAME
42 str \- small string utilities
43 .
44 .\"--------------------------------------------------------------------------
45 .SH SYNOPSIS
46 .
47 .nf
48 .B "#include <mLib/str.h>"
49 .PP
50 .BI "char *str_qword(char **" pp ", unsigned " f );
51 .BI "size_t str_qsplit(char *" p ", char *" v "[], size_t " c ,
52 .BI " char **" rest ", unsigned " f );
53 .BI "char *str_getword(char **" pp );
54 .BI "size_t str_split(char *" p ", char *" v "[], size_t " c ", char **" rest );
55 .BI "int str_matchx(const char *" p ", const char *" s ", unsigned " f );
56 .BI "int str_match(const char *" p ", const char *" s );
57 .BI "void str_sanitize(char *" d ", const char *" p ", size_t " sz );
58 .fi
59 .
60 .\"--------------------------------------------------------------------------
61 .SH DESCRIPTION
62 .
63 The header file
64 .B <mLib/str.h>
65 contains a few small utility functions for manipulating null-terminated
66 strings.
67 .PP
68 The function
69 .B str_qword
70 extracts the next whitespace-delimited word from a string. The
71 function's argument,
72 .IR pp ,
73 is the address of a pointer into the string: this pointer is updated by
74 .B str_qword
75 so that it can extract the following word on the next call and so on.
76 The return value is the address of the next word, appropriately null
77 terminated. A null pointer is returned if the entire remainder of the
78 string is whitespace. Note that
79 .B str_qword
80 modifies the string as it goes, to null-terminate the individual words.
81 If the flag
82 .B STRF_QUOTE
83 is passed, the single- and double-quote characters may be used to quote
84 whitespace within words, and the backslash can escape quote characters
85 and whitespace.
86 .PP
87 The function
88 .B str_qsplit
89 divides a string into whitespace-separated words. The arguments are as
90 follows:
91 .TP
92 .BI "char *" p
93 The address of the string to split. The string is modified by having
94 null terminators written after each word extracted.
95 .TP
96 .BI "char *" v []
97 The address of an array of pointers to characters. This array will be
98 filled in by
99 .BR str_split :
100 the first entry will point to the first word extracted from the string,
101 and so on. If there aren't enough words in the string, the remaining
102 array elements are filled with null pointers.
103 .TP
104 .BI "size_t " c
105 The maximum number of words to extract; also, the number of elements in
106 the array
107 .IR v .
108 .TP
109 .BI "char **" rest
110 The address of a pointer in which to store the address of the remainder
111 of the string. Leading whitespace is removed from the remainder before
112 storing. If the remainder string is empty, a null pointer is stored
113 instead. If
114 .I rest
115 is null, the remainder pointer is discarded.
116 .TP
117 .BI "unsigned " f
118 Flags, as for
119 .BR str_qsplit .
120 .PP
121 The return value of
122 .B str_qsplit
123 is the number of words extracted from the input string.
124 .PP
125 The functions
126 .B str_getword
127 and
128 .B str_split
129 are veneers over
130 .B str_qword
131 and
132 .B str_qsplit
133 respectively; they are equivalent to calls to the latter functions with
134 flags words of zero.
135 .PP
136 The
137 .B str_matchx
138 function does simple wildcard matching. The first argument is a
139 pattern, which may contain metacharacters:
140 .RB ` * '
141 matches zero or more arbitrary characters;
142 .RB ` ? '
143 matches exactly one arbitrary characters; and
144 .RB ` [ ... ] '
145 matches one of the characters listed. The backslash
146 .RB ` \e '
147 escapes the following character. Within square brackets, the
148 hyphen
149 .RB ` \- '
150 may be used to designate ranges of characters. If the initial character
151 is
152 .RB ` ! '
153 or
154 .RB ` ^ '
155 then the sense of the match is reversed. To literally match a
156 .RB ` ] '
157 character, list it first; to literally match a
158 .RB ` \- '
159 character, list it immediately after a range, or at the beginning or end
160 of the set. The return value is nonzero if the pattern
161 .I p
162 matches the given string
163 .IR s ,
164 or zero if the pattern doesn't match. If the flag
165 .B STRF_PREFIX
166 is passed,
167 .B str_matchx
168 returns true if it reaches the end of the target string before finding a
169 mismatch \(en i.e., if the target string is a prefix of a string which
170 might match the pattern. The function
171 .B str_match
172 is a convenient wrapper for
173 .B str_matchx
174 with a zero flags word, which is the normal case.
175 .PP
176 The function
177 .B str_sanitize
178 copies at most
179 .I sz
180 \- 1
181 characters from the string
182 .I p
183 to
184 .IR d .
185 The result string is null terminated. Any nonprinting characters in
186 .I p
187 are replaced by an underscore
188 .RB ` _ '
189 when written to
190 .IR d .
191 .
192 .\"--------------------------------------------------------------------------
193 .SH EXAMPLES
194 .
195 Given the code
196 .VS
197 char p[] = " alpha beta gamma delta ";
198 char *v[3];
199 size_t n;
200 char *q;
201 .VP
202 n = str_split(p, v, 3, &q);
203 .VE
204 following the call to
205 .BR str_split ,
206 .B n
207 will have the value 3,
208 .B v[0]
209 will point to
210 .RB ` alpha ',
211 .B v[1]
212 will point to
213 .RB ` beta ',
214 .B v[2]
215 will point to
216 .RB ` gamma '
217 and
218 .B rest
219 will point to
220 .RB ` delta\ '
221 (note the trailing space).
222 .PP
223 Similarly, given the string
224 .B """\ alpha\ \ beta\ """
225 instead,
226 .B n
227 will be assigned the value 2,
228 .B v[0]
229 and
230 .B v[1]
231 will have the same values as last time, and
232 .B v[2]
233 and
234 .B rest
235 will be null.
236 .
237 .\"--------------------------------------------------------------------------
238 .SH "SEE ALSO"
239 .
240 .BR mLib (3).
241 .
242 .\"--------------------------------------------------------------------------
243 .SH AUTHOR
244 .
245 Mark Wooding, <mdw@distorted.org.uk>
246 .
247 .\"----- That's all, folks --------------------------------------------------