New quoted string handling and simple pattern matching.
[mLib] / str.c
1 /* -*-c-*-
2 *
3 * $Id: str.c,v 1.4 2000/10/08 09:43:34 mdw Exp $
4 *
5 * Functions for hacking with strings
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of the mLib utilities library.
13 *
14 * mLib is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * mLib is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with mLib; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: str.c,v $
33 * Revision 1.4 2000/10/08 09:43:34 mdw
34 * New quoted string handling and simple pattern matching.
35 *
36 * Revision 1.3 1999/12/22 15:41:14 mdw
37 * Skip past trailing whitespace in str_getword.
38 *
39 * Revision 1.2 1999/05/26 20:52:57 mdw
40 * Add new `rest' argument for `str_split'.
41 *
42 * Revision 1.1 1999/05/17 20:37:01 mdw
43 * Some trivial string hacks.
44 *
45 */
46
47 /*----- Header files ------------------------------------------------------*/
48
49 #include <ctype.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53
54 #include "str.h"
55
56 /*----- Main code ---------------------------------------------------------*/
57
58 /* --- @str_qword@ --- *
59 *
60 * Arguments: @char **pp@ = address of pointer into string
61 * @unsigned f@ = various flags
62 *
63 * Returns: Pointer to the next space-separated possibly-quoted word from
64 * the string, or null.
65 *
66 * Use: Fetches the next word from a string. If the flag
67 * @STRF_QUOTE@ is set, the `\' character acts as an escape, and
68 * single and double quotes protect whitespace.
69 */
70
71 #define STRF_QUOTE 1u
72
73 char *str_qword(char **pp, unsigned f)
74 {
75 char *p = *pp, *q, *qq;
76 int st = 0, pst = 0;
77
78 /* --- Preliminaries --- */
79
80 if (!p)
81 return (0);
82 while (isspace((unsigned char)*p))
83 p++;
84 if (!*p) {
85 *pp = 0;
86 return (0);
87 }
88
89 /* --- Main work --- */
90
91 for (q = qq = p; *q; q++) {
92 switch (st) {
93 case '\\':
94 *qq++ = *q;
95 st = pst;
96 break;
97 case '\'':
98 case '\"':
99 if (*q == st)
100 st = pst = 0;
101 else if (*q == '\\')
102 st = '\\';
103 else
104 *qq++ = *q;
105 break;
106 default:
107 if (isspace((unsigned char)*q)) {
108 do q++; while (*q && isspace((unsigned char)*q));
109 goto done;
110 } else if (!(f & STRF_QUOTE))
111 goto stdchar;
112 switch (*q) {
113 case '\\':
114 st = '\\';
115 break;
116 case '\'':
117 case '\"':
118 st = pst = *q;
119 break;
120 default:
121 stdchar:
122 *qq++ = *q;
123 break;
124 }
125 }
126 }
127
128 /* --- Finished --- */
129
130 done:
131 *pp = *q ? q : 0;
132 *qq++ = 0;
133 return (p);
134 }
135
136 /* --- @str_qsplit@ --- *
137 *
138 * Arguments: @char *p@ = pointer to string
139 * @char *v[]@ = pointer to array to fill in
140 * @size_t c@ = count of strings to fill in
141 * @char **rest@ = where to store the remainder of the string
142 * @unsigned f@ = flags for @str_qword@
143 *
144 * Returns: Number of strings filled in.
145 *
146 * Use: Fills an array with pointers to the individual words of a
147 * string. The string is modified in place to contain zero
148 * bytes at the word boundaries, and the words have leading
149 * and trailing space stripped off. No more than @c@ words
150 * are read; the actual number is returned as the value of the
151 * function. Unused slots in the array are populated with
152 * null bytes. If there's any string left, the address of the
153 * remainder is stored in @rest@ (if it's non-null); otherwise
154 * @rest@ is set to a null pointer.
155 */
156
157 size_t str_qsplit(char *p, char *v[], size_t c, char **rest, unsigned f)
158 {
159 size_t n = 0;
160 char *q;
161
162 while (c && (q = str_qword(&p, f)) != 0) {
163 *v++ = q;
164 c--;
165 n++;
166 }
167 while (c) {
168 *v++ = 0;
169 c--;
170 }
171 if (rest)
172 *rest = p;
173 return (n);
174 }
175
176 /* --- @str_getword@ --- *
177 *
178 * Arguments: @char **pp@ = address of pointer into string
179 *
180 * Returns: Pointer to the next space-separated word from the string,
181 * or null.
182 *
183 * Use: Parses off space-separated words from a string. This is a
184 * compatibility veneer over @str_qword@.
185 */
186
187 char *str_getword(char **pp)
188 {
189 return (str_qword(pp, 0));
190 }
191
192 /* --- @str_split@ --- *
193 *
194 * Arguments: @char *p@ = pointer to string
195 * @char *v[]@ = pointer to array to fill in
196 * @size_t c@ = count of strings to fill in
197 * @char **rest@ = where to store the remainder of the string
198 *
199 * Returns: Number of strings filled in.
200 *
201 * Use: Fills an array with pointers to the individual words of a
202 * string. This is a compatibility veneer over @str_qsplit@.
203 */
204
205 size_t str_split(char *p, char *v[], size_t c, char **rest)
206 {
207 return (str_qsplit(p, v, c, rest, 0));
208 }
209
210 /* --- @str_match@ --- *
211 *
212 * Arguments: @const char *p@ = pointer to pattern string
213 * @const char *s@ = string to compare with
214 *
215 * Returns: Nonzero if the pattern matches the string.
216 *
217 * Use: Does simple wildcard matching. This is quite nasty and more
218 * than a little slow. Supports metacharacters `*', `?' and
219 * '['.
220 */
221
222 int str_match(const char *p, const char *s)
223 {
224 for (;;) {
225 char pch = *p++, pche, sch;
226 int sense;
227
228 switch (pch) {
229 case '?':
230 if (!*s)
231 return (0);
232 s++;
233 break;
234 case '*':
235 if (!*p)
236 return (1);
237 while (*s) {
238 if (str_match(p, s))
239 return (1);
240 s++;
241 }
242 return (0);
243 case '[':
244 if (!*s)
245 return (0);
246 sch = *s++;
247 pch = *p++;
248 sense = 1;
249 if (pch == '^' || pch == '!') {
250 sense = !sense;
251 pch = *p++;
252 }
253 if (pch == ']') {
254 if (*p == '-' && p[1] && p[1] != ']') {
255 pche = p[1];
256 p += 2;
257 if (pch <= sch && sch <= pche)
258 goto class_match;
259 } else if (pch == sch)
260 goto class_match;
261 pch = *p++;
262 }
263 for (;; pch = *p++) {
264 if (!pch || pch == ']')
265 goto class_nomatch;
266 if (*p == '-' && p[1] && p[1] != ']') {
267 pche = p[1];
268 p += 2;
269 if (pch <= sch && sch <= pche)
270 goto class_match;
271 } else if (pch == sch)
272 goto class_match;
273 }
274 class_match:
275 if (!sense)
276 return (0);
277 for (;;) {
278 pch = *p++;
279 if (!pch)
280 return (0);
281 if (pch == ']')
282 break;
283 if (*p == '-' && p[1] && p[1] != ']')
284 p += 2;
285 }
286 break;
287 class_nomatch:
288 if (sense)
289 return (0);
290 break;
291 case '\\':
292 pch = *p++;
293 default:
294 if (pch != *s)
295 return (0);
296 if (!pch)
297 return (1);
298 s++;
299 break;
300 }
301 }
302 }
303
304 /* --- @str_sanitize@ --- *
305 *
306 * Arguments: @char *d@ = destination buffer
307 * @const char *p@ = pointer to source string
308 * @size_t sz@ = size of destination buffer
309 *
310 * Returns: ---
311 *
312 * Use: Writes a string into a buffer, being careful not to overflow
313 * the buffer, to null terminate the result, and to prevent
314 * nasty nonprintable characters ending up in the buffer.
315 */
316
317 void str_sanitize(char *d, const char *p, size_t sz)
318 {
319 if (!sz)
320 return;
321 sz--;
322 while (*p && sz) {
323 int ch = *p++;
324 if (!isgraph((unsigned char)ch))
325 ch = '_';
326 *d++ = ch;
327 sz--;
328 }
329 *d++ = 0;
330 }
331
332 /*----- That's all, folks -------------------------------------------------*/