Commit | Line | Data |
---|---|---|
7b8ff279 MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Common definitions for `runlisp' | |
4 | * | |
5 | * (c) 2020 Mark Wooding | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of Runlisp, a tool for invoking Common Lisp scripts. | |
11 | * | |
12 | * Runlisp is free software: you can redistribute it and/or modify it | |
13 | * under the terms of the GNU General Public License as published by the | |
14 | * Free Software Foundation; either version 3 of the License, or (at your | |
15 | * option) any later version. | |
16 | * | |
17 | * Runlisp is distributed in the hope that it will be useful, but WITHOUT | |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
20 | * for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU General Public License | |
23 | * along with Runlisp. If not, see <https://www.gnu.org/licenses/>. | |
24 | */ | |
25 | ||
26 | /*----- Header files ------------------------------------------------------*/ | |
27 | ||
28 | #include "config.h" | |
29 | ||
30 | #include <assert.h> | |
31 | ||
32 | #include <ctype.h> | |
33 | #include <errno.h> | |
34 | #include <stdarg.h> | |
35 | #include <stdio.h> | |
36 | #include <stdlib.h> | |
37 | #include <string.h> | |
38 | ||
39 | #include <unistd.h> | |
40 | ||
41 | #include "lib.h" | |
42 | ||
7b8ff279 MW |
43 | /*----- Diagnostic utilities ----------------------------------------------*/ |
44 | ||
45 | const char *progname = "???"; | |
8996f767 | 46 | /* Our program name, for use in error messages. */ |
7b8ff279 | 47 | |
8996f767 | 48 | /* Set `progname' from the pathname in PROG (typically from `argv[0]'). */ |
7b8ff279 MW |
49 | void set_progname(const char *prog) |
50 | { | |
51 | const char *p; | |
52 | ||
53 | p = strrchr(prog, '/'); | |
d6127242 | 54 | progname = p ? p + 1 : prog; |
7b8ff279 MW |
55 | } |
56 | ||
8996f767 MW |
57 | /* Report an error or warning in Unix style, given a captured argument |
58 | * cursor. | |
59 | */ | |
7b8ff279 MW |
60 | void vmoan(const char *msg, va_list ap) |
61 | { | |
62 | fprintf(stderr, "%s: ", progname); | |
63 | vfprintf(stderr, msg, ap); | |
64 | fputc('\n', stderr); | |
65 | } | |
66 | ||
8996f767 | 67 | /* Issue a warning message. */ |
7b8ff279 MW |
68 | void moan(const char *msg, ...) |
69 | { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); } | |
70 | ||
8996f767 | 71 | /* Issue a fatal error message and exit unsuccessfully. */ |
7b8ff279 MW |
72 | void lose(const char *msg, ...) |
73 | { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); } | |
74 | ||
75 | /*----- Memory allocation -------------------------------------------------*/ | |
76 | ||
8996f767 MW |
77 | /* Allocate and return a pointer to N bytes, or report a fatal error. |
78 | * | |
79 | * Release the pointer using `free' as usual. If N is zero, returns null | |
80 | * (but you are not expected to check for this). | |
81 | */ | |
7b8ff279 MW |
82 | void *xmalloc(size_t n) |
83 | { | |
84 | void *p; | |
85 | ||
86 | if (!n) return (0); | |
87 | p = malloc(n); if (!p) lose("failed to allocate memory"); | |
88 | return (p); | |
89 | } | |
90 | ||
8996f767 MW |
91 | /* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long. |
92 | * | |
93 | * The block might (and probably will) move, so it returns the new address. | |
94 | * If N is zero, then the block is freed (if necessary) and a null pointer | |
95 | * returned; otherwise, if P is null then a fresh block is allocated. If | |
96 | * allocation fails, then a fatal error is reported. | |
97 | */ | |
7b8ff279 MW |
98 | void *xrealloc(void *p, size_t n) |
99 | { | |
100 | if (!n) { free(p); return (0); } | |
101 | else if (!p) return (xmalloc(n)); | |
102 | p = realloc(p, n); if (!p) lose("failed to allocate memory"); | |
103 | return (p); | |
104 | } | |
105 | ||
8996f767 MW |
106 | /* Allocate and return a copy of the N-byte string starting at P. |
107 | * | |
108 | * The new string is null-terminated, though P need not be. If allocation | |
109 | * fails, then a fatal error is reported. | |
110 | */ | |
7b8ff279 MW |
111 | char *xstrndup(const char *p, size_t n) |
112 | { | |
113 | char *q = xmalloc(n + 1); | |
114 | ||
115 | memcpy(q, p, n); q[n] = 0; | |
116 | return (q); | |
117 | } | |
118 | ||
8996f767 MW |
119 | /* Allocate and return a copy of the null-terminated string starting at P. |
120 | * | |
121 | * If allocation fails, then a fatal error is reported. | |
122 | */ | |
7b8ff279 MW |
123 | char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); } |
124 | ||
125 | /*----- Dynamic strings ---------------------------------------------------*/ | |
126 | ||
8996f767 MW |
127 | /* Initialize the string D. |
128 | * | |
129 | * Usually you'd use the static initializer `DSTR_INIT'. | |
130 | */ | |
7b8ff279 MW |
131 | void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; } |
132 | ||
8996f767 | 133 | /* Reset string D so it's empty again. */ |
7b8ff279 MW |
134 | void dstr_reset(struct dstr *d) { d->len = 0; } |
135 | ||
8996f767 | 136 | /* Ensure that D has at least N unused bytes available. */ |
7b8ff279 MW |
137 | void dstr_ensure(struct dstr *d, size_t n) |
138 | { | |
139 | size_t need = d->len + n, newsz; | |
140 | ||
141 | if (need <= d->sz) return; | |
142 | newsz = d->sz ? 2*d->sz : 16; | |
143 | while (newsz < need) newsz *= 2; | |
144 | d->p = xrealloc(d->p, newsz); d->sz = newsz; | |
145 | } | |
146 | ||
8996f767 MW |
147 | /* Release the memory held by D. |
148 | * | |
149 | * It must be reinitialized (e.g., by `dstr_init') before it can be used | |
150 | * again. | |
151 | */ | |
7b8ff279 MW |
152 | void dstr_release(struct dstr *d) { free(d->p); } |
153 | ||
8996f767 MW |
154 | /* Append the N-byte string at P to D. |
155 | * | |
156 | * P need not be null-terminated. D will not be null-terminated | |
157 | * afterwards. | |
158 | */ | |
7b8ff279 MW |
159 | void dstr_putm(struct dstr *d, const void *p, size_t n) |
160 | { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; } | |
161 | ||
8996f767 MW |
162 | /* Append the null-terminated string P to D. |
163 | * | |
164 | * D /is/ guaranteed to be null-terminated after this. | |
165 | */ | |
7b8ff279 MW |
166 | void dstr_puts(struct dstr *d, const char *p) |
167 | { | |
168 | size_t n = strlen(p); | |
169 | ||
170 | dstr_ensure(d, n + 1); | |
171 | memcpy(d->p + d->len, p, n + 1); | |
172 | d->len += n; | |
173 | } | |
174 | ||
8996f767 MW |
175 | /* Append the single character CH to D. |
176 | * | |
177 | * D will not be null-terminated afterwards. | |
178 | */ | |
7b8ff279 MW |
179 | void dstr_putc(struct dstr *d, int ch) |
180 | { dstr_ensure(d, 1); d->p[d->len++] = ch; } | |
181 | ||
8996f767 MW |
182 | /* Append N copies of the character CH to D. |
183 | * | |
184 | * D will not be null-terminated afterwards. | |
185 | */ | |
7b8ff279 MW |
186 | void dstr_putcn(struct dstr *d, int ch, size_t n) |
187 | { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; } | |
188 | ||
8996f767 MW |
189 | /* Null-terminate the string D. |
190 | * | |
191 | * This doesn't change the length of D. If further stuff is appended then | |
192 | * the null terminator will be overwritten. | |
193 | */ | |
7b8ff279 MW |
194 | void dstr_putz(struct dstr *d) |
195 | { dstr_ensure(d, 1); d->p[d->len] = 0; } | |
196 | ||
8996f767 MW |
197 | /* Append stuff to D, determined by printf(3) format string P and argument |
198 | * tail AP. | |
199 | * | |
200 | * D will not be null-terminated afterwards. | |
201 | */ | |
7b8ff279 MW |
202 | void dstr_vputf(struct dstr *d, const char *p, va_list ap) |
203 | { | |
204 | va_list ap2; | |
205 | size_t r; | |
206 | int n; | |
207 | ||
208 | r = d->sz - d->len; | |
209 | va_copy(ap2, ap); | |
210 | n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0); | |
211 | va_end(ap2); | |
212 | if (n >= r) { | |
213 | dstr_ensure(d, n + 1); r = d->sz - d->len; | |
214 | n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r); | |
215 | } | |
216 | d->len += n; | |
217 | } | |
218 | ||
8996f767 MW |
219 | /* Append stuff to D, determined by printf(3) format string P and arguments. |
220 | * | |
221 | * D will not be null-terminated afterwards. | |
222 | */ | |
7b8ff279 MW |
223 | PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...) |
224 | { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); } | |
225 | ||
8996f767 MW |
226 | /* Append the next input line from FP to D. |
227 | * | |
228 | * Return 0 on success, or -1 if reading immediately fails or encounters | |
229 | * end-of-file (call ferror(3) to distinguish). Any trailing newline is | |
230 | * discarded: it is not possible to determine whether the last line was ended | |
231 | * with a newline. D is guaranteed to be null-terminated afterwards. | |
232 | */ | |
7b8ff279 MW |
233 | int dstr_readline(struct dstr *d, FILE *fp) |
234 | { | |
235 | size_t n; | |
236 | int any = 0; | |
237 | ||
238 | for (;;) { | |
239 | dstr_ensure(d, 2); | |
240 | if (!fgets(d->p + d->len, d->sz - d->len, fp)) break; | |
241 | n = strlen(d->p + d->len); assert(n > 0); any = 1; | |
242 | d->len += n; | |
243 | if (d->p[d->len - 1] == '\n') { d->p[--d->len] = 0; break; } | |
244 | } | |
245 | ||
246 | if (!any) return (-1); | |
247 | else return (0); | |
248 | } | |
249 | ||
250 | /*----- Dynamic vectors of strings ----------------------------------------*/ | |
251 | ||
8996f767 MW |
252 | /* Initialize the vector AV. |
253 | * | |
254 | * Usually you'd use the static initializer `ARGV_INIT'. | |
255 | */ | |
7b8ff279 MW |
256 | void argv_init(struct argv *av) |
257 | { av->v = 0; av->o = av->n = av->sz = 0; } | |
258 | ||
8996f767 | 259 | /* Reset the vector AV so that it's empty again. */ |
7b8ff279 MW |
260 | void argv_reset(struct argv *av) { av->n = 0; } |
261 | ||
8996f767 | 262 | /* Ensure that AV has at least N unused slots at the end. */ |
7b8ff279 MW |
263 | void argv_ensure(struct argv *av, size_t n) |
264 | { | |
265 | size_t need = av->n + av->o + n, newsz; | |
266 | ||
267 | if (need <= av->sz) return; | |
268 | newsz = av->sz ? 2*av->sz : 8; | |
269 | while (newsz < need) newsz *= 2; | |
acbcd7d7 | 270 | av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)); av->v += av->o; |
7b8ff279 MW |
271 | av->sz = newsz; |
272 | } | |
273 | ||
8996f767 | 274 | /* Ensure that AV has at least N unused slots at the /start/. */ |
7b8ff279 MW |
275 | void argv_ensure_offset(struct argv *av, size_t n) |
276 | { | |
277 | size_t newoff; | |
278 | ||
279 | /* Stupid version. We won't, in practice, be prepending lots of stuff, so | |
280 | * avoid the extra bookkeeping involved in trying to make a double-ended | |
281 | * extendable array asymptotically efficient. | |
282 | */ | |
283 | if (av->o >= n) return; | |
284 | newoff = 16; | |
285 | while (newoff < n) newoff *= 2; | |
286 | argv_ensure(av, newoff - av->o); | |
8996f767 | 287 | memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *)); |
7b8ff279 MW |
288 | av->v += newoff - av->o; av->o = newoff; |
289 | } | |
290 | ||
8996f767 MW |
291 | /* Release the memory held by AV. |
292 | * | |
293 | * It must be reinitialized (e.g., by `argv_init') before it can be used | |
294 | * again. | |
295 | */ | |
7b8ff279 MW |
296 | void argv_release(struct argv *av) { free(av->v - av->o); } |
297 | ||
8996f767 MW |
298 | /* Append the pointer P to AV. */ |
299 | void argv_append(struct argv *av, char *p) | |
7b8ff279 MW |
300 | { argv_ensure(av, 1); av->v[av->n++] = p; } |
301 | ||
8996f767 MW |
302 | /* Append a null pointer to AV, without extending the vactor length. |
303 | * | |
304 | * The null pointer will be overwritten when the next string is appended. | |
305 | */ | |
7b8ff279 MW |
306 | void argv_appendz(struct argv *av) |
307 | { argv_ensure(av, 1); av->v[av->n] = 0; } | |
308 | ||
8996f767 MW |
309 | /* Append a N-element vector V of pointers to AV. */ |
310 | void argv_appendn(struct argv *av, char *const *v, size_t n) | |
7b8ff279 MW |
311 | { |
312 | argv_ensure(av, n); | |
313 | memcpy(av->v + av->n, v, n*sizeof(const char *)); | |
314 | av->n += n; | |
315 | } | |
316 | ||
8996f767 | 317 | /* Append the variable-length vector BV to AV. */ |
7b8ff279 MW |
318 | void argv_appendav(struct argv *av, const struct argv *bv) |
319 | { argv_appendn(av, bv->v, bv->n); } | |
320 | ||
8996f767 MW |
321 | /* Append the pointers from a variable-length argument list AP to AV. |
322 | * | |
323 | * The list is terminated by a null pointer. | |
324 | */ | |
7b8ff279 MW |
325 | void argv_appendv(struct argv *av, va_list ap) |
326 | { | |
8996f767 MW |
327 | char *p; |
328 | for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); } | |
7b8ff279 MW |
329 | } |
330 | ||
8996f767 | 331 | /* Append the argument pointers, terminated by a null pointer, to AV. */ |
7b8ff279 MW |
332 | void argv_appendl(struct argv *av, ...) |
333 | { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); } | |
334 | ||
8996f767 MW |
335 | /* Prepend the pointer P to AV. */ |
336 | void argv_prepend(struct argv *av, char *p) | |
7b8ff279 MW |
337 | { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; } |
338 | ||
8996f767 MW |
339 | /* Prepend a N-element vector V of pointers to AV. */ |
340 | void argv_prependn(struct argv *av, char *const *v, size_t n) | |
7b8ff279 MW |
341 | { |
342 | argv_ensure_offset(av, n); | |
343 | av->o -= n; av->v -= n; av->n += n; | |
344 | memcpy(av->v, v, n*sizeof(const char *)); | |
345 | } | |
346 | ||
8996f767 | 347 | /* Prepend the variable-length vector BV to AV. */ |
7b8ff279 MW |
348 | void argv_prependav(struct argv *av, const struct argv *bv) |
349 | { argv_prependn(av, bv->v, bv->n); } | |
350 | ||
8996f767 MW |
351 | /* Prepend the pointers from a variable-length argument list AP to AV. |
352 | * | |
353 | * The list is terminated by a null pointer. | |
354 | */ | |
7b8ff279 MW |
355 | void argv_prependv(struct argv *av, va_list ap) |
356 | { | |
8996f767 | 357 | char *p, **v; |
7b8ff279 MW |
358 | size_t n = 0; |
359 | ||
360 | for (;;) { | |
8996f767 | 361 | p = va_arg(ap, char *); if (!p) break; |
7b8ff279 MW |
362 | argv_prepend(av, p); n++; |
363 | } | |
364 | v = av->v; | |
365 | while (n >= 2) { | |
366 | p = v[0]; v[0] = v[n - 1]; v[n - 1] = p; | |
367 | v++; n -= 2; | |
368 | } | |
369 | } | |
370 | ||
8996f767 | 371 | /* Prepend the argument pointers, terminated by a null pointer, to AV. */ |
7b8ff279 MW |
372 | void argv_prependl(struct argv *av, ...) |
373 | { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); } | |
374 | ||
375 | /*----- Treaps ------------------------------------------------------------*/ | |
376 | ||
8996f767 MW |
377 | /* Return nonzero if the AN-byte string A is strictly precedes the BN-byte |
378 | * string B in a lexicographic ordering. | |
379 | * | |
380 | * All comparisons of keys is handled by this function. | |
381 | */ | |
382 | static int str_lt(const char *a, size_t an, const char *b, size_t bn) | |
383 | { | |
384 | /* This is a little subtle. We need only compare the first N bytes of the | |
385 | * strings, where N is the length of the shorter string. If this | |
386 | * distinguishes the two strings, then we're clearly done. Otherwise, if | |
387 | * the prefixes are equal then the shorter string is the smaller one. If | |
388 | * the two strings are the same length, then they're equal. | |
389 | * | |
390 | * Hence, if A is the strictly shorter string, then A precedes B if A | |
391 | * precedes or matches the prefix of B; otherwise A only precedes B if A | |
392 | * strictly precedes the prefix of B. | |
393 | */ | |
394 | if (an < bn) return (MEMCMP(a, <=, b, an)); | |
395 | else return (MEMCMP(a, <, b, bn)); | |
396 | } | |
397 | ||
398 | /* Initialize the treap T. | |
399 | * | |
400 | * Usually you'd use the static initializer `TREAP_INIT'. | |
401 | */ | |
7b8ff279 MW |
402 | void treap_init(struct treap *t) { t->root = 0; } |
403 | ||
8996f767 MW |
404 | /* Look up the KN-byte key K in the treap T. |
405 | * | |
406 | * Return a pointer to the matching node if one was found, or null otherwise. | |
407 | */ | |
7b8ff279 MW |
408 | void *treap_lookup(const struct treap *t, const char *k, size_t kn) |
409 | { | |
410 | struct treap_node *n = t->root, *candidate = 0; | |
411 | ||
8996f767 MW |
412 | /* This is a simple prototype for some of the search loops we'll encounter |
413 | * later. Notice that we use a strict one-sided comparison, rather than | |
414 | * the more conventional two-sided comparison. | |
415 | * | |
416 | * The main loop will find the largest key not greater than K. | |
417 | */ | |
418 | while (n) | |
419 | /* Compare the node's key against our key. If the node is too large, | |
420 | * then we ignore it and move left. Otherwise remember this node for | |
421 | * later, and move right to see if we can find a better, larger node. | |
422 | */ | |
423 | ||
7b8ff279 MW |
424 | if (str_lt(k, kn, n->k, n->kn)) n = n->left; |
425 | else { candidate = n; n = n->right; } | |
8996f767 MW |
426 | |
427 | /* If the candidate node is less than our key then we failed. Otherwise, | |
428 | * by trichotomy, we have found the correct node. | |
429 | */ | |
7b8ff279 MW |
430 | if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0); |
431 | return (candidate); | |
432 | } | |
433 | ||
8996f767 MW |
434 | /* Look up the KN-byte K in the treap T, recording a path in P. |
435 | * | |
436 | * This is similar to `treap_lookup', in that it returns the requested node | |
437 | * if it already exists, or null otherwise, but it also records in P | |
438 | * information to be used by `treap_insert' to insert a new node with the | |
10427eb2 | 439 | * given key if it's not there already. |
8996f767 | 440 | */ |
7b8ff279 MW |
441 | void *treap_probe(struct treap *t, const char *k, size_t kn, |
442 | struct treap_path *p) | |
443 | { | |
444 | struct treap_node **nn = &t->root, *candidate = 0; | |
445 | unsigned i = 0; | |
446 | ||
8996f767 MW |
447 | /* This walk is similar to `treap_lookup' above, except that we also record |
448 | * the address of each node pointer we visit along the way. | |
449 | */ | |
7b8ff279 MW |
450 | for (;;) { |
451 | assert(i < TREAP_PATHMAX); p->path[i++] = nn; | |
452 | if (!*nn) break; | |
453 | if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left; | |
454 | else { candidate = *nn; nn = &(*nn)->right; } | |
455 | } | |
456 | p->nsteps = i; | |
8996f767 MW |
457 | |
458 | /* Check to see whether we found the right node. */ | |
7b8ff279 MW |
459 | if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0); |
460 | return (candidate); | |
461 | } | |
462 | ||
8996f767 MW |
463 | /* Insert a new node N into T, associating it with the KN-byte key K. |
464 | * | |
465 | * Use the path data P, from `treap_probe', to help with insertion. | |
466 | */ | |
7b8ff279 MW |
467 | void treap_insert(struct treap *t, const struct treap_path *p, |
468 | struct treap_node *n, const char *k, size_t kn) | |
469 | { | |
470 | size_t i = p->nsteps; | |
471 | struct treap_node **nn, **uu, *u; | |
472 | unsigned wt; | |
473 | ||
8996f767 | 474 | /* Fill in the node structure. */ |
7b8ff279 MW |
475 | n->k = xstrndup(k, kn); n->kn = kn; |
476 | n->wt = wt = rand(); n->left = n->right = 0; | |
8996f767 MW |
477 | |
478 | /* Prepare for the insertion. | |
479 | * | |
480 | * The path actually points to each of the links traversed when searching | |
481 | * for the node, starting with the `root' pointer, then the `left' or | |
482 | * `right' pointer of the root node, and so on; `nsteps' will always be | |
483 | * nonzero, since the path will always pass through the root, and the final | |
484 | * step, `path->path[path->nsteps - 1]' will always be the address of a | |
485 | * null pointer onto which the freshly inserted node could be hooked in | |
486 | * order to satisfy the binary-search-tree ordering. (Of course, this will | |
487 | * likely /not/ satisfy the heap condition, so more work needs to be done.) | |
488 | * | |
489 | * Throughout, NN is our current candidate for where to attach the node N. | |
490 | * As the loop progresses, NN will ascend to links further up the tree, and | |
491 | * N will be adjusted to accumulate pieces of the existing tree structure. | |
492 | * We'll stop when we find that the parent node's weight is larger than our | |
493 | * new node's weight, at which point we can just set *NN = N; or if we run | |
494 | * out of steps in the path, in which case *NN is the root pointer. | |
495 | */ | |
7b8ff279 MW |
496 | assert(i); nn = p->path[--i]; |
497 | while (i--) { | |
8996f767 MW |
498 | |
499 | /* Collect the next step in the path, and get the pointer to the node. */ | |
7b8ff279 | 500 | uu = p->path[i]; u = *uu; |
8996f767 MW |
501 | |
502 | /* If this node's weight is higher, then we've found the right level and | |
503 | * we can stop. | |
504 | */ | |
7b8ff279 | 505 | if (wt <= u->wt) break; |
8996f767 MW |
506 | |
507 | /* The node U is lighter than our new node N, so we must rotate in order | |
508 | * to fix things. If we were currently planning to hook N as the left | |
509 | * subtree of U, then we rotate like this: | |
510 | * | |
511 | * | | | |
10427eb2 | 512 | * U (N) |
8996f767 MW |
513 | * / \ / \ |
514 | * (N) Z ---> X U | |
515 | * / \ / \ | |
516 | * X Y Y Z | |
517 | * | |
518 | * On the other hand, if we ere planning to hook N as the right subtree | |
519 | * of U, then we do the opposite rotation: | |
520 | * | |
521 | * | | | |
10427eb2 | 522 | * U (N) |
8996f767 MW |
523 | * / \ / \ |
524 | * X (N) ---> U Z | |
525 | * / \ / \ | |
526 | * Y Z X Y | |
527 | * | |
528 | * These transformations clearly preserve the ordering of nodes in the | |
529 | * binary search tree, and satisfy the heap condition in the subtree | |
530 | * headed by N. | |
531 | */ | |
7b8ff279 MW |
532 | if (nn == &u->left) { u->left = n->right; n->right = u; } |
533 | else { u->right = n->left; n->left = u; } | |
8996f767 MW |
534 | |
535 | /* And this arrangement must be attached to UU, or some higher attachment | |
536 | * point. The subtree satisfies the heap condition, and can be attached | |
537 | * safely at the selected place. | |
538 | */ | |
7b8ff279 MW |
539 | nn = uu; |
540 | } | |
8996f767 MW |
541 | |
542 | /* We've found the right spot. Hook the accumulated subtree into place. */ | |
7b8ff279 MW |
543 | *nn = n; |
544 | } | |
545 | ||
8996f767 MW |
546 | /* Remove the node with the KN-byte K from T. |
547 | * | |
548 | * Return the address of the node we removed, or null if it couldn't be | |
549 | * found. | |
550 | */ | |
7b8ff279 MW |
551 | void *treap_remove(struct treap *t, const char *k, size_t kn) |
552 | { | |
553 | struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r; | |
554 | ||
8996f767 MW |
555 | /* Search for the matching node, but keep track of the address of the link |
556 | * which points to our target node. | |
557 | */ | |
558 | while (*nn) | |
7b8ff279 MW |
559 | if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left; |
560 | else { candidate = nn; nn = &(*nn)->right; } | |
8996f767 MW |
561 | |
562 | /* If this isn't the right node then give up. */ | |
7b8ff279 MW |
563 | if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn)) |
564 | return (0); | |
565 | ||
8996f767 MW |
566 | /* Now we need to disentangle the node from the tree. This is essentially |
567 | * the reverse of insertion: we pretend that this node is suddenly very | |
568 | * light, and mutate the tree so as to restore the heap condition until | |
569 | * eventually our node is a leaf and can be cut off without trouble. | |
570 | * | |
571 | * Throughout, the link *NN notionally points to N, but we don't actually | |
572 | * update it until we're certain what value it should finally take. | |
573 | */ | |
574 | nn = candidate; n = *nn; l = n->left; r = n->right; | |
575 | for (;;) | |
576 | ||
577 | /* If its left subtree is empty then we can replace our node by its right | |
578 | * subtree and be done. Similarly, if the right subtree is empty then we | |
579 | * replace the node by its left subtree. | |
580 | * | |
581 | * | | | | | |
582 | * (N) ---> R ; (N) ---> L | |
583 | * / \ / \ | |
584 | * * R L * | |
585 | */ | |
586 | if (!l) { *nn = r; break; } | |
587 | else if (!r) { *nn = l; break; } | |
588 | ||
589 | /* Otherwise we need to rotate the pointers so that the heavier of the | |
590 | * two children takes the place of our node; thus we have either | |
591 | * | |
592 | * | | | |
593 | * (N) L | |
594 | * / \ / \ | |
595 | * L R ---> X (N) | |
596 | * / \ / \ | |
10427eb2 | 597 | * X Y Y R |
8996f767 MW |
598 | * |
599 | * or | |
600 | * | |
601 | * | | | |
602 | * (N) R | |
603 | * / \ / \ | |
604 | * L R ---> (N) Y | |
605 | * / \ / \ | |
606 | * X Y L X | |
607 | * | |
608 | * Again, these transformations clearly preserve the ordering of nodes in | |
609 | * the binary search tree, and the heap condition. | |
610 | */ | |
10427eb2 MW |
611 | else if (l->wt > r->wt) |
612 | { *nn = l; nn = &l->right; l = n->left = l->right; } | |
613 | else | |
614 | { *nn = r; nn = &r->left; r = n->right = r->left; } | |
8996f767 MW |
615 | |
616 | /* Release the key buffer, and return the node that we've now detached. */ | |
617 | free(n->k); return (n); | |
7b8ff279 MW |
618 | } |
619 | ||
8996f767 | 620 | /* Initialize an iterator I over T's nodes. */ |
7b8ff279 MW |
621 | void treap_start_iter(struct treap *t, struct treap_iter *i) |
622 | { | |
623 | struct treap_node *n = t->root; | |
624 | unsigned sp = 0; | |
625 | ||
8996f767 MW |
626 | /* The `stack' in the iterator structure is an empty ascending stack of |
627 | * nodes which have been encountered, and their left subtrees investigated, | |
628 | * but not yet visited by the iteration. | |
629 | * | |
630 | * Iteration begins by stacking the root node, its left child, and so on, | |
631 | * At the end of this, the topmost entry on the stack is the least node of | |
632 | * the tree, followed by its parent, grandparent, and so on up to the root. | |
633 | */ | |
7b8ff279 MW |
634 | while (n) { |
635 | assert(sp < TREAP_PATHMAX); | |
636 | i->stack[sp++] = n; n = n->left; | |
637 | } | |
638 | i->sp = sp; | |
639 | } | |
640 | ||
8996f767 MW |
641 | /* Return the next node from I, in ascending order by key. |
642 | * | |
643 | * If there are no more nodes, then return null. | |
644 | */ | |
7b8ff279 MW |
645 | void *treap_next(struct treap_iter *i) |
646 | { | |
647 | struct treap_node *n, *o; | |
648 | unsigned sp = i->sp; | |
649 | ||
8996f767 MW |
650 | /* We say that a node is /visited/ once it's been returned by this |
651 | * iterator. To traverse a tree in order, then, we traverse its left | |
652 | * subtree, visit the tree root, and traverse its right subtree -- which is | |
653 | * a fine recursive definition, but we need a nonrecursive implementation. | |
654 | * | |
655 | * As is usual in this kind of essential structural recursion, we maintain | |
656 | * a stack. The invariant that we'll maintain is as follows. | |
657 | * | |
658 | * 1. If the stack is empty, then all nodes have been visited. | |
659 | * | |
660 | * 2, If the stack is nonempty then the topmost entry on the stack is the | |
661 | * least node which has not yet been visited -- and therefore is the | |
662 | * next node to visit. | |
663 | * | |
664 | * 3. The earlier entries in the stack are, in (top to bottom) order, | |
665 | * those of the topmost node's parent, grandparent, etc., up to the | |
666 | * root, which have not yet been visited. More specifically, a node | |
667 | * appears in the stack if and only if some node in its left subtree | |
668 | * is nearer the top of the stack. | |
669 | * | |
670 | * When we initialized the iterator state (in `treap_start_iter' above), we | |
671 | * traced a path to the leftmost leaf, stacking the root, its left-hand | |
672 | * child, and so on. The leftmost leaf is clearly the first node to be | |
673 | * visited, and its entire ancestry is on the stack since none of these | |
674 | * nodes has yet been visited. (If the tree is empty, then we have done | |
675 | * nothing, the stack is empty, and there are no nodes to visit.) This | |
676 | * establishes the base case for the induction. | |
677 | */ | |
678 | ||
679 | /* So, if the stack is empty now, then (1) all of the nodes have been | |
680 | * visited and there's nothing left to do. Return null. | |
681 | */ | |
7b8ff279 | 682 | if (!sp) return (0); |
8996f767 MW |
683 | |
684 | /* It's clear that, if we pop the topmost element of the stack, visit it, | |
685 | * and arrange to reestablish the invariant, then we'll visit the nodes in | |
686 | * the correct order, pretty much by definition. | |
687 | * | |
688 | * So, pop a node off the stack. This is the node we shall return. But | |
689 | * before we can do that, we must reestablish the above invariant. | |
690 | * Firstly, the current node is removed from the stack, because we're about | |
691 | * to visit it, and visited nodes don't belong on the stack. Then there | |
692 | * are two cases to consider. | |
693 | * | |
694 | * * If the current node's right subtree is not empty, then the next node | |
695 | * to be visited is the leftmost node in that subtree. All of the | |
10427eb2 | 696 | * nodes on the stack are ancestors of the current node, and the right |
8996f767 | 697 | * subtree consists of its descendants, so none of them are already on |
10427eb2 | 698 | * the stack; and they're all greater than the current node, and |
8996f767 MW |
699 | * therefore haven't been visited. Therefore, we must push the current |
700 | * node's right child, its /left/ child, and so on, proceeding | |
701 | * leftwards until we fall off the bottom of the tree. | |
702 | * | |
703 | * * Otherwise, we've finished traversing some subtree. Either we are | |
704 | * now done, or (3) we have just finished traversing the left subtree | |
705 | * of the next topmost item on the stack. This must therefore be the | |
706 | * next node to visit. The rest of the stack is already correct. | |
707 | */ | |
7b8ff279 MW |
708 | n = i->stack[--sp]; |
709 | o = n->right; | |
710 | while (o) { | |
711 | assert(sp < TREAP_PATHMAX); | |
712 | i->stack[sp++] = o; o = o->left; | |
713 | } | |
714 | i->sp = sp; | |
715 | return (n); | |
716 | } | |
717 | ||
8996f767 MW |
718 | /* Recursively check the subtree headed by N. |
719 | * | |
720 | * No node should have weight greater than MAXWT, to satisfy the heap | |
721 | * condition; if LO is not null, then all node keys should be strictly | |
722 | * greater than LO, and, similarly, if HI is not null, then all keys should | |
723 | * be strictly smaller than HI. | |
724 | */ | |
725 | static void check_subtree(struct treap_node *n, unsigned maxwt, | |
726 | const char *klo, const char *khi) | |
7b8ff279 | 727 | { |
8996f767 | 728 | /* Check the heap condition. */ |
7b8ff279 | 729 | assert(n->wt <= maxwt); |
8996f767 MW |
730 | |
731 | /* Check that the key is in bounds. (Use `strcmp' here to ensure that our | |
732 | * own `str_lt' is working correctly.) | |
733 | */ | |
7b8ff279 MW |
734 | if (klo) assert(STRCMP(n->k, >, klo)); |
735 | if (khi) assert(STRCMP(n->k, <, khi)); | |
8996f767 MW |
736 | |
737 | /* Check the left subtree. Node weights must be bounded above by our own | |
738 | * weight. And everykey in the left subtree must be smaller than our | |
739 | * current key. We propagate the lower bound. | |
740 | */ | |
741 | if (n->left) check_subtree(n->left, n->wt, klo, n->k); | |
742 | ||
743 | /* Finally, check the right subtree. This time, every key must be larger | |
744 | * than our key, and we propagate the upper bound. | |
745 | */ | |
746 | if (n->right) check_subtree(n->right, n->wt, n->k, khi); | |
7b8ff279 MW |
747 | } |
748 | ||
8996f767 | 749 | /* Check the treap structure rules for T. */ |
7b8ff279 | 750 | void treap_check(struct treap *t) |
8996f767 | 751 | { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); } |
7b8ff279 | 752 | |
8996f767 MW |
753 | /* Recursively dump the subtree headed by N, indenting the output lines by |
754 | * IND spaces. | |
755 | */ | |
7b8ff279 MW |
756 | static void dump_node(struct treap_node *n, int ind) |
757 | { | |
758 | if (n->left) dump_node(n->left, ind + 1); | |
759 | printf(";;%*s [%10u] `%s'\n", 2*ind, "", n->wt, n->k); | |
760 | if (n->right) dump_node(n->right, ind + 1); | |
761 | } | |
762 | ||
8996f767 | 763 | /* Dump the treap T to standard output, for debugging purposes. */ |
7b8ff279 MW |
764 | void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); } |
765 | ||
766 | /*----- Configuration file parsing ----------------------------------------*/ | |
767 | ||
768 | #ifndef DECL_ENVIRON | |
769 | extern char **environ; | |
770 | #endif | |
771 | ||
8996f767 MW |
772 | /* Advance P past a syntactically valid name, but no further than L. |
773 | * | |
774 | * Return the new pointer. If no name is found, report an error, blaming | |
775 | * FILE and LINE; WHAT is an adjective for the kind of name that was | |
776 | * expected. | |
777 | */ | |
778 | static const char *scan_name(const char *what, | |
779 | const char *p, const char *l, | |
780 | const char *file, unsigned line) | |
781 | { | |
782 | const char *q = p; | |
783 | ||
784 | while (q < l && | |
785 | (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' || | |
786 | *q == '*' || *q == '+' || *q == '%' || *q == '@')) | |
787 | q++; | |
788 | if (q == p) lose("%s:%u: expected %s name", file, line, what); | |
789 | return (q); | |
790 | } | |
791 | ||
792 | /* Initialize the configuration state CONF. | |
793 | * | |
794 | * Usually you'd use the static initializer `CONFIG_INIT'. | |
795 | */ | |
7b8ff279 MW |
796 | void config_init(struct config *conf) |
797 | { treap_init(&conf->sections); } | |
798 | ||
8996f767 MW |
799 | /* Find and return the section with null-terminated NAME in CONF. |
800 | * | |
801 | * If no section is found, the behaviour depends on whether `CF_CREAT' is set | |
802 | * in F: if so, an empty section is created and returned; otherwise, a null | |
803 | * pointer is returned. | |
804 | */ | |
7b8ff279 MW |
805 | struct config_section *config_find_section(struct config *conf, unsigned f, |
806 | const char *name) | |
807 | { return (config_find_section_n(conf, f, name, strlen(name))); } | |
808 | ||
8996f767 MW |
809 | /* Find and return the section with the SZ-byte NAME in CONF. |
810 | * | |
811 | * This works like `config_find_section', but with an explicit length for the | |
812 | * NAME rather than null-termination. | |
813 | */ | |
7b8ff279 MW |
814 | struct config_section *config_find_section_n(struct config *conf, unsigned f, |
815 | const char *name, size_t sz) | |
816 | { | |
817 | struct config_section *sect; | |
818 | struct treap_path path; | |
819 | ||
820 | if (!(f&CF_CREAT)) | |
821 | sect = treap_lookup(&conf->sections, name, sz); | |
822 | else { | |
823 | sect = treap_probe(&conf->sections, name, sz, &path); | |
824 | if (!sect) { | |
825 | sect = xmalloc(sizeof(*sect)); | |
826 | if (!conf->head) conf->tail = &conf->head; | |
827 | sect->next = 0; *conf->tail = sect; conf->tail = §->next; | |
828 | sect->parents = 0; sect->nparents = SIZE_MAX; | |
829 | treap_init(§->vars); treap_init(§->cache); | |
830 | treap_insert(&conf->sections, &path, §->_node, name, sz); | |
8996f767 | 831 | config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz); |
7b8ff279 MW |
832 | } |
833 | } | |
834 | return (sect); | |
835 | } | |
836 | ||
8996f767 MW |
837 | /* Set the fallback section for CONF to be SECT. |
838 | * | |
839 | * That is, if a section has no explicit parents, then by default it will | |
840 | * have a single parent which is SECT. If SECT is null then there is no | |
841 | * fallback section, and sections which don't have explicitly specified | |
842 | * parents have no parents at all. (This is the default situation.) | |
843 | */ | |
7b8ff279 | 844 | void config_set_fallback(struct config *conf, struct config_section *sect) |
8996f767 | 845 | { conf->fallback = sect; } |
7b8ff279 | 846 | |
8996f767 MW |
847 | /* Arrange that SECT has PARENT as its single parent section. |
848 | * | |
849 | * If PARENT is null, then arrange that SECT has no parents at all. In | |
850 | * either case, any `@parents' setting will be ignored. | |
851 | */ | |
7b8ff279 MW |
852 | void config_set_parent(struct config_section *sect, |
853 | struct config_section *parent) | |
854 | { | |
855 | if (!parent) | |
856 | sect->nparents = 0; | |
857 | else { | |
858 | sect->parents = xmalloc(sizeof(*sect->parents)); | |
859 | sect->parents[0] = parent; sect->nparents = 1; | |
860 | } | |
861 | } | |
862 | ||
8996f767 | 863 | /* Initialize I to iterate over the sections defined in CONF. */ |
7b8ff279 MW |
864 | void config_start_section_iter(struct config *conf, |
865 | struct config_section_iter *i) | |
866 | { i->sect = conf->head; } | |
867 | ||
8996f767 MW |
868 | /* Return the next section from I, in order of creation. |
869 | * | |
870 | * If there are no more sections, then return null. | |
871 | */ | |
7b8ff279 MW |
872 | struct config_section *config_next_section(struct config_section_iter *i) |
873 | { | |
874 | struct config_section *sect; | |
875 | ||
876 | sect = i->sect; | |
877 | if (sect) i->sect = sect->next; | |
878 | return (sect); | |
879 | } | |
880 | ||
8996f767 MW |
881 | /* Initialize the `parents' links of SECT, if they aren't set up already. |
882 | * | |
883 | * If SECT contains a `@parents' setting then parse it to determine the | |
884 | * parents; otherwise use CONF's fallbeck section, as established by | |
885 | * `config_set_fallback'. | |
886 | */ | |
7b8ff279 MW |
887 | static void set_config_section_parents(struct config *conf, |
888 | struct config_section *sect) | |
889 | { | |
890 | struct config_section *parent; | |
891 | struct config_var *var; | |
8996f767 | 892 | const char *file; unsigned line; |
7b8ff279 | 893 | size_t i, n; |
8996f767 MW |
894 | char *p, *q, *l; |
895 | struct argv av = ARGV_INIT; | |
7b8ff279 | 896 | |
8996f767 MW |
897 | /* If the section already has parents established then there's nothing to |
898 | * do. | |
899 | */ | |
7b8ff279 MW |
900 | if (sect->nparents != SIZE_MAX) return; |
901 | ||
8996f767 MW |
902 | /* Look up `@parents', without recursion! */ |
903 | var = treap_lookup(§->vars, "@parents", 8); | |
7b8ff279 | 904 | if (!var) { |
8996f767 MW |
905 | /* No explicit setting: use the fallback setting. */ |
906 | ||
907 | if (!conf->fallback || conf->fallback == sect) | |
7b8ff279 MW |
908 | sect->nparents = 0; |
909 | else { | |
8996f767 | 910 | sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1; |
7b8ff279 MW |
911 | sect->parents[0] = conf->fallback; |
912 | } | |
913 | } else { | |
8996f767 MW |
914 | /* Found a `@parents' list: parse it and set the parents list. */ |
915 | ||
916 | file = var->file; line = var->line; if (!file) file = "<internal>"; | |
917 | ||
918 | /* We do this in two phases. First, we parse out the section names, and | |
919 | * record start/limit pointer pairs in `av'. | |
920 | */ | |
921 | p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++; | |
922 | while (*p) { | |
923 | q = p; | |
924 | p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line); | |
925 | argv_append(&av, q); argv_append(&av, p); | |
926 | while (p < l && ISSPACE(*p)) p++; | |
927 | if (p >= l) break; | |
928 | if (*p == ',') do p++; while (ISSPACE(*p)); | |
7b8ff279 | 929 | } |
8996f767 MW |
930 | |
931 | /* Now that we've finished parsing, we know how many parents we're going | |
932 | * to have, so we can allocate the `parents' vector and fill it in. | |
933 | */ | |
7b8ff279 | 934 | sect->nparents = av.n/2; |
f50ecbe1 | 935 | sect->parents = xmalloc(sect->nparents*sizeof(*sect->parents)); |
7b8ff279 MW |
936 | for (i = 0; i < av.n; i += 2) { |
937 | n = av.v[i + 1] - av.v[i]; | |
938 | parent = config_find_section_n(conf, 0, av.v[i], n); | |
939 | if (!parent) | |
940 | lose("%s:%u: unknown parent section `%.*s'", | |
8996f767 | 941 | file, line, (int)n, av.v[i]); |
7b8ff279 MW |
942 | sect->parents[i/2] = parent; |
943 | } | |
7b8ff279 | 944 | } |
8996f767 MW |
945 | |
946 | /* All done. */ | |
947 | argv_release(&av); | |
7b8ff279 MW |
948 | } |
949 | ||
8996f767 MW |
950 | /* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT. |
951 | * | |
952 | * If successful, return a pointer to the variable; otherwise return null. | |
953 | * Inheritance cycles and ambiguous inheritance are diagnosed as fatal | |
954 | * errors. | |
955 | */ | |
7b8ff279 MW |
956 | struct config_var *search_recursive(struct config *conf, |
957 | struct config_section *sect, | |
958 | const char *name, size_t sz) | |
959 | { | |
960 | struct config_cache_entry *cache; | |
961 | struct treap_path path; | |
962 | struct config_var *var, *v; | |
963 | size_t i, j = j; | |
964 | ||
8996f767 MW |
965 | /* If the variable is defined locally then we can just return it. */ |
966 | var = treap_lookup(§->vars, name, sz); if (var) return (var); | |
967 | ||
968 | /* If we have no parents then there's no way we can find it. */ | |
969 | set_config_section_parents(conf, sect); | |
970 | if (!sect->parents) return (0); | |
971 | ||
972 | /* Otherwise we must visit the section's parents. We can avoid paying for | |
973 | * this on every lookup by using a cache. If there's already an entry for | |
974 | * this variable then we can return the result immediately (note that we | |
975 | * cache both positive and negative outcomes). Otherwise we create a new | |
976 | * cache entry, do the full recursive search, and fill in the result when | |
977 | * we're done. | |
978 | * | |
979 | * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a | |
980 | * new cache entry when it's first created, and clear it when we fill in | |
981 | * the result: if we encounter an open cache entry again, we know that | |
982 | * we've found a cycle. | |
983 | */ | |
7b8ff279 MW |
984 | cache = treap_probe(§->cache, name, sz, &path); |
985 | if (!cache) { | |
986 | cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN; | |
987 | treap_insert(§->cache, &path, &cache->_node, name, sz); | |
988 | } else if (cache->f&CF_OPEN) | |
989 | lose("inheritance cycle through section `%s'", | |
990 | CONFIG_SECTION_NAME(sect)); | |
991 | else | |
992 | return (cache->var); | |
993 | ||
8996f767 MW |
994 | /* Recursively search in each parent. We insist that all parents that find |
995 | * a variable find the same binding; otherwise we declare ambiguous | |
996 | * inheritance. | |
997 | */ | |
998 | for (i = 0; i < sect->nparents; i++) { | |
999 | v = search_recursive(conf, sect->parents[i], name, sz); | |
1000 | if (!v); | |
1001 | else if (!var) { var = v; j = i; } | |
1002 | else if (var != v) | |
1003 | lose("section `%s' inherits variable `%s' ambiguously " | |
1004 | "via `%s' and `%s'", | |
1005 | CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var), | |
1006 | CONFIG_SECTION_NAME(sect->parents[j]), | |
1007 | CONFIG_SECTION_NAME(sect->parents[i])); | |
7b8ff279 MW |
1008 | } |
1009 | ||
8996f767 MW |
1010 | /* All done: fill the cache entry in, clear the open flag, and return the |
1011 | * result. | |
1012 | */ | |
7b8ff279 MW |
1013 | cache->var = var; cache->f &= ~CF_OPEN; |
1014 | return (var); | |
1015 | } | |
1016 | ||
8996f767 MW |
1017 | /* Find and return the variable with null-terminated NAME in SECT. |
1018 | * | |
1019 | * If `CF_INHERIT' is set in F, then the function searches the section's | |
1020 | * parents recursively; otherwise, it only checks to see whether the variable | |
1021 | * is set directly in SECT. | |
1022 | * | |
1023 | * If no variable is found, the behaviour depends on whether `CF_CREAT' is | |
1024 | * set in F: if so, an empty variable is created and returned; otherwise, a | |
1025 | * null pointer is returned. | |
1026 | * | |
1027 | * Setting both `CF_INHERIT' and `CF_CREAT' is not useful. | |
1028 | */ | |
7b8ff279 MW |
1029 | struct config_var *config_find_var(struct config *conf, |
1030 | struct config_section *sect, | |
1031 | unsigned f, const char *name) | |
1032 | { return (config_find_var_n(conf, sect, f, name, strlen(name))); } | |
1033 | ||
8996f767 MW |
1034 | /* Find and return the variable with the given SZ-byte NAME in SECT. |
1035 | * | |
1036 | * This works like `config_find_var', but with an explicit length for the | |
1037 | * NAME rather than null-termination. | |
1038 | */ | |
7b8ff279 MW |
1039 | struct config_var *config_find_var_n(struct config *conf, |
1040 | struct config_section *sect, | |
1041 | unsigned f, const char *name, size_t sz) | |
1042 | { | |
1043 | struct config_var *var; | |
1044 | struct treap_path path; | |
1045 | ||
1046 | if (f&CF_INHERIT) | |
1047 | var = search_recursive(conf, sect, name, sz); | |
1048 | else if (!(f&CF_CREAT)) | |
1049 | var = treap_lookup(§->vars, name, sz); | |
1050 | else { | |
1051 | var = treap_probe(§->vars, name, sz, &path); | |
1052 | if (!var) { | |
1053 | var = xmalloc(sizeof(*var)); | |
1054 | var->val = 0; var->file = 0; var->f = 0; var->line = 1; | |
1055 | treap_insert(§->vars, &path, &var->_node, name, sz); | |
1056 | } | |
1057 | } | |
1058 | return (var); | |
1059 | } | |
1060 | ||
8996f767 MW |
1061 | /* Set variable NAME to VALUE in SECT, with associated flags F. |
1062 | * | |
1063 | * The names are null-terminated. The flags are variable flags: see `struct | |
6c39ec6d | 1064 | * config_var' for details. Returns the variable. |
8996f767 MW |
1065 | * |
1066 | * If the variable is already set and has the `CF_OVERRIDE' flag, then this | |
1067 | * function does nothing unless `CF_OVERRIDE' is /also/ set in F. | |
1068 | */ | |
6c39ec6d MW |
1069 | struct config_var *config_set_var(struct config *conf, |
1070 | struct config_section *sect, | |
1071 | unsigned f, | |
1072 | const char *name, const char *value) | |
7b8ff279 | 1073 | { |
6c39ec6d MW |
1074 | return (config_set_var_n(conf, sect, f, |
1075 | name, strlen(name), | |
1076 | value, strlen(value))); | |
7b8ff279 MW |
1077 | } |
1078 | ||
8996f767 MW |
1079 | /* As `config_set_var', except that the variable NAME and VALUE have explicit |
1080 | * lengths (NAMELEN and VALUELEN, respectively) rather than being null- | |
1081 | * terminated. | |
1082 | */ | |
6c39ec6d MW |
1083 | struct config_var *config_set_var_n(struct config *conf, |
1084 | struct config_section *sect, | |
1085 | unsigned f, | |
1086 | const char *name, size_t namelen, | |
1087 | const char *value, size_t valuelen) | |
7b8ff279 MW |
1088 | { |
1089 | struct config_var *var = | |
1090 | config_find_var_n(conf, sect, CF_CREAT, name, namelen); | |
1091 | ||
64457605 | 1092 | if (var->f&~f&CF_OVERRIDE) return (var); |
7b8ff279 MW |
1093 | free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen; |
1094 | var->f = f; | |
6c39ec6d | 1095 | return (var); |
7b8ff279 MW |
1096 | } |
1097 | ||
8996f767 MW |
1098 | /* Initialize I to iterate over the variables directly defined in SECT. */ |
1099 | void config_start_var_iter(struct config *conf, struct config_section *sect, | |
1100 | struct config_var_iter *i) | |
1101 | { treap_start_iter(§->vars, &i->i); } | |
1102 | ||
1103 | /* Return next variable from I, in ascending lexicographical order. | |
1104 | * | |
1105 | * If there are no more variables, then return null. | |
1106 | */ | |
1107 | struct config_var *config_next_var(struct config_var_iter *i) | |
1108 | { return (treap_next(&i->i)); } | |
1109 | ||
1110 | /* Read and parse configuration FILE, applying its settings to CONF. | |
1111 | * | |
1112 | * If all goes well, the function returns 0. If the file is not found, then | |
1113 | * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the | |
1114 | * function simply returns -1. Otherwise, a fatal error is reported. Note | |
1115 | * that this /only/ applies if the file does not exist (specifically, opening | |
1116 | * it fails with `ENOENT') -- any other problems are reported as fatal | |
1117 | * errors regardless of the flag setting. | |
1118 | */ | |
7b8ff279 MW |
1119 | int config_read_file(struct config *conf, const char *file, unsigned f) |
1120 | { | |
1121 | struct config_section *sect; | |
1122 | struct config_var *var; | |
1123 | struct dstr d = DSTR_INIT, dd = DSTR_INIT; | |
1124 | unsigned line = 0; | |
8996f767 | 1125 | const char *p, *q, *r; |
7b8ff279 MW |
1126 | FILE *fp; |
1127 | ||
8996f767 | 1128 | /* Try to open the file. */ |
7b8ff279 MW |
1129 | fp = fopen(file, "r"); |
1130 | if (!fp) { | |
1131 | if ((f&CF_NOENTOK) && errno == ENOENT) return (-1); | |
1132 | lose("failed to open configuration file `%s': %s", | |
1133 | file, strerror(errno)); | |
1134 | } | |
1135 | ||
8996f767 | 1136 | /* Find the initial section. */ |
7b8ff279 MW |
1137 | sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0; |
1138 | ||
8996f767 | 1139 | /* Work through the file, line by line. */ |
7b8ff279 MW |
1140 | for (;;) { |
1141 | dstr_reset(&d); if (dstr_readline(&d, fp)) break; | |
1142 | line++; | |
1143 | ||
8996f767 MW |
1144 | /* Trim trailing spaces from the line. The syntax is sensitive to |
1145 | * leading spaces, so we can't trim those yet. | |
1146 | */ | |
1147 | while (d.len && ISSPACE(d.p[d.len - 1])) d.len--; | |
1148 | d.p[d.len] = 0; | |
1149 | ||
1150 | if (!*d.p || *d.p == ';') | |
1151 | /* Ignore comments entirely. (In particular, a comment doesn't | |
1152 | * interrupt a multiline variable value.) | |
1153 | */ | |
1154 | ; | |
1155 | ||
1156 | else if (ISSPACE(d.p[0])) { | |
1157 | /* The line starts with whitespace, so it's a continuation line. */ | |
1158 | ||
1159 | /* Skip the initial whitespace. */ | |
1160 | p = d.p; while (ISSPACE(*p)) p++; | |
1161 | ||
1162 | /* If we aren't collecting a variable value then this is an error. | |
1163 | * Otherwise, accumulate it into the current value. | |
1164 | */ | |
1165 | if (!var) | |
1166 | lose("%s:%u: continuation line, but no variable", file, line); | |
1167 | if (dd.len) dstr_putc(&dd, ' '); | |
1168 | dstr_putm(&dd, p, d.len - (p - d.p)); | |
1169 | ||
1170 | } else { | |
1171 | /* The line starts in the first column. */ | |
1172 | ||
1173 | /* If there's a value value being collected then we must commit it to | |
1174 | * its variable (unless there's already a setting there that says we | |
1175 | * shouldn't). | |
1176 | */ | |
7b8ff279 MW |
1177 | if (var) { |
1178 | if (!(var->f&CF_OVERRIDE)) | |
1179 | { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; } | |
1180 | var = 0; | |
1181 | } | |
8996f767 MW |
1182 | |
1183 | /* Now decide what kind of line this is. */ | |
1184 | if (d.p[0] == '[') { | |
1185 | /* It's a section header. */ | |
1186 | ||
1187 | /* Parse the header. */ | |
1188 | p = d.p + 1; while (ISSPACE(*p)) p++; | |
1189 | q = scan_name("section", p, d.p + d.len, file, line); | |
1190 | r = q; while (ISSPACE(*r)) r++; | |
1191 | if (*r != ']') | |
1192 | lose("%s:%u: expected `]' in section header", file, line); | |
1193 | if (r[1]) | |
1194 | lose("%s:%u: trailing junk after `]' in section header", | |
1195 | file, line); | |
1196 | ||
1197 | /* Create the new section. */ | |
7b8ff279 | 1198 | sect = config_find_section_n(conf, CF_CREAT, p, q - p); |
8996f767 | 1199 | |
7b8ff279 | 1200 | } else { |
8996f767 MW |
1201 | /* It's a variable assignment. Parse the name out. */ |
1202 | p = scan_name("variable", d.p, d.p + d.len, file, line); | |
7b8ff279 MW |
1203 | var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p); |
1204 | while (ISSPACE(*p)) p++; | |
1205 | if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line); | |
1206 | p++; while (ISSPACE(*p)) p++; | |
8996f767 MW |
1207 | |
1208 | /* Clear out the variable's initial value, unless we shouldn't | |
1209 | * override it. | |
1210 | */ | |
7b8ff279 MW |
1211 | if (!(var->f&CF_OVERRIDE)) { |
1212 | free(var->val); var->val = 0; var->f = 0; | |
1213 | free(var->file); var->file = xstrdup(file); var->line = line; | |
1214 | } | |
1215 | dstr_reset(&dd); dstr_puts(&dd, p); | |
1216 | } | |
7b8ff279 MW |
1217 | } |
1218 | } | |
1219 | ||
8996f767 | 1220 | /* If there's a value under construction then commit the result. */ |
7b8ff279 MW |
1221 | if (var && !(var->f&CF_OVERRIDE)) |
1222 | { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; } | |
1223 | ||
8996f767 | 1224 | /* Close the file. */ |
7b8ff279 MW |
1225 | if (fclose(fp)) |
1226 | lose("error reading configuration file `%s': %s", file, strerror(errno)); | |
8996f767 MW |
1227 | |
1228 | /* All done. */ | |
1229 | dstr_release(&d); dstr_release(&dd); | |
7b8ff279 MW |
1230 | return (0); |
1231 | } | |
1232 | ||
8996f767 MW |
1233 | /* Populate SECT with environment variables. |
1234 | * | |
1235 | * Environment variables are always set with `CF_LITERAL'. | |
1236 | */ | |
7b8ff279 MW |
1237 | void config_read_env(struct config *conf, struct config_section *sect) |
1238 | { | |
1239 | const char *p, *v; | |
1240 | size_t i; | |
1241 | ||
1242 | for (i = 0; (p = environ[i]) != 0; i++) { | |
1243 | v = strchr(p, '='); if (!v) continue; | |
1244 | config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1)); | |
1245 | } | |
1246 | } | |
1247 | ||
1248 | /*----- Substitution and quoting ------------------------------------------*/ | |
1249 | ||
8996f767 MW |
1250 | /* The substitution and word-splitting state. |
1251 | * | |
1252 | * This only keeps track of the immutable parameters for the substitution | |
1253 | * task: stuff which changes (flags, filtering state, cursor position) is | |
1254 | * maintained separately. | |
1255 | */ | |
7b8ff279 | 1256 | struct subst { |
8996f767 MW |
1257 | struct config *config; /* configuration state */ |
1258 | struct config_section *home; /* home section for lookups */ | |
1259 | struct dstr *d; /* current word being constructed */ | |
1260 | struct argv *av; /* output word list */ | |
7b8ff279 MW |
1261 | }; |
1262 | ||
8996f767 MW |
1263 | /* Flags for `subst' and related functions. */ |
1264 | #define SF_SPLIT 0x0001u /* split at (unquoted) whitespace */ | |
1265 | #define SF_QUOT 0x0002u /* currently within double quotes */ | |
1266 | #define SF_SUBST 0x0004u /* apply `$-substitutions */ | |
1267 | #define SF_SUBEXPR 0x0008u /* stop at delimiter `|' or `}' */ | |
1268 | #define SF_SPANMASK 0x00ffu /* mask for the above */ | |
1269 | ||
1270 | #define SF_WORD 0x0100u /* output word under construction */ | |
1271 | #define SF_SKIP 0x0200u /* not producing output */ | |
1272 | #define SF_LITERAL 0x0400u /* do not expand or substitute */ | |
1273 | #define SF_UPCASE 0x0800u /* convert to uppercase */ | |
1274 | #define SF_DOWNCASE 0x1000u /* convert to lowercase */ | |
1275 | #define SF_CASEMASK 0x1800u /* mask for case conversions */ | |
1276 | ||
1277 | /* Apply filters encoded in QFILT and F to the text from P to L, and output. | |
1278 | * | |
1279 | * SB is the substitution state which, in particular, explains where the | |
1280 | * output should go. | |
1281 | * | |
1282 | * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case | |
1283 | * conversions, and a nesting depth QFILT for toothpick escaping. (QFILT is | |
1284 | * encoded as the number of toothpicks to print: see `subst' for how this | |
1285 | * determined.) | |
1286 | */ | |
1287 | static void filter_string(const char *p, const char *l, | |
1288 | const struct subst *sb, unsigned qfilt, unsigned f) | |
7b8ff279 MW |
1289 | { |
1290 | size_t r, n; | |
8996f767 | 1291 | char *q; const char *pp, *ll; |
7b8ff279 | 1292 | |
8996f767 MW |
1293 | if (!qfilt && !(f&SF_CASEMASK)) |
1294 | /* Fast path: there's nothing to do: just write to the output. */ | |
7b8ff279 | 1295 | dstr_putm(sb->d, p, l - p); |
8996f767 | 1296 | |
7b8ff279 | 1297 | else for (;;) { |
8996f767 MW |
1298 | /* We must be a bit more circumspect. */ |
1299 | ||
1300 | /* Determine the length of the next span of characters which don't need | |
1301 | * escaping. (If QFILT is zero then this is everything.) | |
1302 | */ | |
1303 | r = l - p; n = qfilt ? strcspn(p, "\"\\") : r; | |
7b8ff279 | 1304 | if (n > r) n = r; |
8996f767 MW |
1305 | |
1306 | if (!(f&SF_CASEMASK)) | |
1307 | /* No case conversion: we can just emit this chunk. */ | |
1308 | ||
1309 | dstr_putm(sb->d, p, n); | |
1310 | ||
1311 | else { | |
1312 | /* Case conversion to do. Arrange enough space for the output, and | |
1313 | * convert it character by character. | |
1314 | */ | |
1315 | ||
1316 | dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n; | |
1317 | if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++); | |
1318 | else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++); | |
1319 | sb->d->len += n; | |
1320 | } | |
1321 | ||
1322 | /* If we've reached the end then stop. */ | |
7b8ff279 | 1323 | if (n >= r) break; |
8996f767 MW |
1324 | |
1325 | /* Otherwise we must have found a character which requires escaping. | |
1326 | * Emit enough toothpicks. | |
1327 | */ | |
1328 | dstr_putcn(sb->d, '\\', qfilt); | |
1329 | ||
1330 | /* This character is now done, so we can skip over and see if there's | |
1331 | * another chunk of stuff we can do at high speed. | |
1332 | */ | |
1333 | dstr_putc(sb->d, p[n]); p += n + 1; | |
7b8ff279 MW |
1334 | } |
1335 | } | |
1336 | ||
8996f767 MW |
1337 | /* Scan and resolve a `[SECT:]VAR' specifier at P. |
1338 | * | |
1339 | * Return the address of the next character following the specifier. L is a | |
1340 | * limit on the region of the buffer that we should process; SB is the | |
1341 | * substitution state which provides the home section if none is given | |
1342 | * explicitly; FILE and LINE are the source location to blame for problems. | |
1343 | */ | |
7b8ff279 | 1344 | static const char *retrieve_varspec(const char *p, const char *l, |
8996f767 MW |
1345 | const struct subst *sb, |
1346 | struct config_var **var_out, | |
1347 | const char *file, unsigned line) | |
7b8ff279 MW |
1348 | { |
1349 | struct config_section *sect = sb->home; | |
1350 | const char *t; | |
1351 | ||
8996f767 | 1352 | t = scan_name("section or variable", p, l, file, line); |
7b8ff279 MW |
1353 | if (t < l && *t == ':') { |
1354 | sect = config_find_section_n(sb->config, 0, p, t - p); | |
8996f767 | 1355 | p = t + 1; t = scan_name("variable", p, l, file, line); |
7b8ff279 MW |
1356 | } |
1357 | ||
1358 | if (!sect) *var_out = 0; | |
1359 | else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p); | |
1360 | return (t); | |
1361 | } | |
1362 | ||
8996f767 MW |
1363 | /* Substitute and/or word-split text. |
1364 | * | |
1365 | * The input text starts at P, and continues to (just before) L. Context for | |
1366 | * the task is provided by SB; the source location to blame is FILE and LINE | |
1367 | * (FILE may be null so that this can be passed directly from a `config_var' | |
1368 | * without further checking); QFILT is the nesting depth in toothpick- | |
1369 | * escaping; and F holds a mask of `SF_...' flags. | |
1370 | */ | |
1371 | static const char *subst(const char *p, const char *l, | |
1372 | const struct subst *sb, | |
7b8ff279 MW |
1373 | const char *file, unsigned line, |
1374 | unsigned qfilt, unsigned f) | |
1375 | { | |
1376 | struct config_var *var; | |
1377 | const char *q0, *q1, *t; | |
1378 | unsigned subqfilt, ff; | |
1379 | size_t n; | |
1380 | ||
8996f767 | 1381 | /* It would be best if we could process literal text at high speed. To |
0b17bcec MW |
1382 | * this end, we have a table, indexed by the low-order bits of F, to tell |
1383 | * us which special characters we need to stop at. This way, we can use | |
1384 | * `strcspn' to skip over literal text and stop at the next character which | |
1385 | * needs special handling. Entries in this table with a null pointer | |
1386 | * correspond to impossible flag settings. | |
8996f767 MW |
1387 | */ |
1388 | static const char *const delimtab[] = { | |
1389 | ||
1390 | #define ESCAPE "\\" /* always watch for `\'-escapes */ | |
1391 | #define SUBST "$" /* check for `$' if `SF_SUBST' set */ | |
1392 | #define WORDSEP " \f\r\n\t\v'\"" /* space, quotes if `SF_SPLIT' but | |
1393 | * not `SF_QUOT' */ | |
1394 | #define QUOT "\"" /* only quotes if `SF_SPLIT' and | |
1395 | * `SF_QUOT' */ | |
1396 | #define DELIM "|}" /* end delimiters of `SF_SUBEXPR' */ | |
1397 | ||
1398 | ESCAPE, | |
1399 | ESCAPE WORDSEP, | |
1400 | 0, | |
1401 | ESCAPE QUOT, | |
1402 | ESCAPE SUBST, | |
1403 | ESCAPE SUBST WORDSEP, | |
1404 | 0, | |
1405 | ESCAPE SUBST QUOT, | |
1406 | ESCAPE DELIM, | |
1407 | ESCAPE DELIM WORDSEP, | |
1408 | 0, | |
1409 | ESCAPE DELIM QUOT, | |
1410 | ESCAPE DELIM SUBST, | |
1411 | ESCAPE DELIM SUBST WORDSEP, | |
1412 | 0, | |
1413 | ESCAPE DELIM SUBST QUOT | |
7b8ff279 MW |
1414 | |
1415 | #undef COMMON | |
1416 | #undef WORDSEP | |
1417 | #undef SQUOT | |
1418 | #undef DELIM | |
8996f767 | 1419 | }; |
7b8ff279 | 1420 | |
8996f767 | 1421 | /* Set FILE to be useful if it was null on entry. */ |
7b8ff279 MW |
1422 | if (!file) file = "<internal>"; |
1423 | ||
8996f767 MW |
1424 | /* If the text is literal then hand off to `filter_string'. This obviously |
1425 | * starts a word. | |
1426 | */ | |
7b8ff279 | 1427 | if (f&SF_LITERAL) { |
8996f767 | 1428 | filter_string(p, l, sb, qfilt, f); |
7b8ff279 MW |
1429 | f |= SF_WORD; |
1430 | goto done; | |
1431 | } | |
1432 | ||
8996f767 | 1433 | /* Chew through the input until it's all gone. */ |
7b8ff279 MW |
1434 | while (p < l) { |
1435 | ||
1436 | if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) { | |
8996f767 MW |
1437 | /* This is whitespace, we're supposed to split, and we're not within |
1438 | * quotes, so we should split here. | |
1439 | */ | |
1440 | ||
1441 | /* If there's a word in progress then we should commit it. */ | |
7b8ff279 MW |
1442 | if (f&SF_WORD) { |
1443 | if (!(f&SF_SKIP)) { | |
1444 | argv_append(sb->av, xstrndup(sb->d->p, sb->d->len)); | |
1445 | dstr_reset(sb->d); | |
1446 | } | |
1447 | f &= ~SF_WORD; | |
1448 | } | |
8996f767 MW |
1449 | |
1450 | /* Skip over further whitespace at high speed. */ | |
7b8ff279 MW |
1451 | do p++; while (p < l && ISSPACE(*p)); |
1452 | ||
1453 | } else if (*p == '\\') { | |
8996f767 MW |
1454 | /* This is a toothpick, so start a new word and add the next character |
1455 | * to it. | |
1456 | */ | |
1457 | ||
1458 | /* If there's no next charact3er then we should be upset. */ | |
1459 | p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line); | |
1460 | ||
7b8ff279 | 1461 | if (!(f&SF_SKIP)) { |
8996f767 MW |
1462 | |
1463 | /* If this is a double quote or backslash then check DFLT to see if | |
1464 | * it needs escaping. | |
1465 | */ | |
7b8ff279 MW |
1466 | if (qfilt && (*p == '"' || *p == '\\')) |
1467 | dstr_putcn(sb->d, '\\', qfilt); | |
8996f767 MW |
1468 | |
1469 | /* Output the character. */ | |
1470 | if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p)); | |
1471 | else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p)); | |
1472 | else dstr_putc(sb->d, *p); | |
7b8ff279 | 1473 | } |
8996f767 MW |
1474 | |
1475 | /* Move past the escaped character. Remember we started a word. */ | |
1476 | p++; f |= SF_WORD; | |
7b8ff279 MW |
1477 | |
1478 | } else if ((f&SF_SPLIT) && *p == '"') { | |
8996f767 MW |
1479 | /* This is a double quote, and we're word splitting. We're definitely |
1480 | * in a word now. Toggle whether we're within quotes. | |
1481 | */ | |
1482 | ||
7b8ff279 MW |
1483 | f ^= SF_QUOT; f |= SF_WORD; p++; |
1484 | ||
1485 | } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') { | |
8996f767 MW |
1486 | /* This is a single quote, and we're word splitting but not within |
1487 | * double quotes. Find the matching end quote, and just output | |
1488 | * everything between literally. | |
1489 | */ | |
1490 | ||
1491 | p++; t = strchr(p, '\''); | |
1492 | if (!t || t >= l) lose("%s:%u: missing `''", file, line); | |
1493 | if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f); | |
7b8ff279 MW |
1494 | p = t + 1; f |= SF_WORD; |
1495 | ||
1496 | } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) { | |
8996f767 | 1497 | /* This is an end delimiter, and we're supposed to stop here. */ |
7b8ff279 MW |
1498 | break; |
1499 | ||
1500 | } else if ((f&SF_SUBST) && *p == '$') { | |
8996f767 MW |
1501 | /* This is a `$' and we're supposed to do substitution. */ |
1502 | ||
1503 | /* The kind of substitution is determined by the next character. */ | |
7b8ff279 | 1504 | p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line); |
8996f767 MW |
1505 | |
1506 | /* Prepare flags for a recursive substitution. | |
1507 | * | |
1508 | * Hide our quote state from the recursive call. If we're within a | |
1509 | * word, then disable word-splitting. | |
1510 | */ | |
7b8ff279 | 1511 | ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0)); |
8996f767 MW |
1512 | |
1513 | /* Now dispatch based on the following character. */ | |
7b8ff279 MW |
1514 | switch (*p) { |
1515 | ||
1516 | case '?': | |
8996f767 MW |
1517 | /* A conditional expression: $?VAR{CONSEQ[|ALT]} */ |
1518 | ||
1519 | /* Skip initial space. */ | |
1520 | p++; while (p < l && ISSPACE(*p)) p++; | |
1521 | ||
1522 | /* Find the variable. */ | |
1523 | p = retrieve_varspec(p, l, sb, &var, file, line); | |
1524 | ||
1525 | /* Skip whitespace again. */ | |
1526 | while (p < l && ISSPACE(*p)) p++; | |
1527 | ||
1528 | /* Expect the opening `{'. */ | |
7b8ff279 MW |
1529 | if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line); |
1530 | p++; | |
8996f767 MW |
1531 | |
1532 | /* We'll process the parts recursively, but we need to come back | |
1533 | * when we hit the appropriate delimiters, so arrange for that. | |
1534 | */ | |
7b8ff279 | 1535 | ff |= SF_SUBEXPR; |
8996f767 MW |
1536 | |
1537 | /* Process the consequent (skip if the variable wasn't found). */ | |
7b8ff279 MW |
1538 | p = subst(p, l, sb, file, line, qfilt, |
1539 | ff | (var ? 0 : SF_SKIP)); | |
8996f767 MW |
1540 | |
1541 | /* If there's a `|' then process the alternative too (skip if the | |
1542 | * variable /was/ found). | |
1543 | */ | |
7b8ff279 MW |
1544 | if (p < l && *p == '|') |
1545 | p = subst(p + 1, l, sb, file, line, qfilt, | |
1546 | ff | (var ? SF_SKIP : 0)); | |
8996f767 MW |
1547 | |
1548 | /* We should now be past the closing `}'. */ | |
7b8ff279 MW |
1549 | if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line); |
1550 | p++; | |
1551 | break; | |
1552 | ||
1553 | case '{': | |
8996f767 MW |
1554 | /* A variable substitution: ${VAR[|FILT]...[?ALT]} */ |
1555 | ||
1556 | /* Skip initial whitespace. */ | |
1557 | p++; while (p < l && ISSPACE(*p)) p++; | |
1558 | ||
1559 | /* Find the variable. */ | |
1560 | q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p; | |
1561 | ||
1562 | /* Determine the filters to apply when substituting the variable | |
1563 | * value. | |
1564 | */ | |
7b8ff279 | 1565 | subqfilt = qfilt; |
8996f767 MW |
1566 | for (;;) { |
1567 | ||
1568 | /* Skip spaces again. */ | |
1569 | while (p < l && ISSPACE(*p)) p++; | |
1570 | ||
1571 | /* If there's no `|' then there are no more filters, so stop. */ | |
1572 | if (p >= l || *p != '|') break; | |
1573 | ||
1574 | /* Skip the `|' and more spaces. */ | |
1575 | p++; while (p < l && ISSPACE(*p)) p++; | |
1576 | ||
1577 | /* Collect the filter name. */ | |
1578 | t = scan_name("filter", p, l, file, line); | |
1579 | ||
1580 | /* Dispatch on the filter name. */ | |
1581 | if (t - p == 1 && *p == 'q') | |
1582 | /* `q' -- quote for Lisp string. | |
1583 | * | |
1584 | * We're currently adding Q `\' characters before each naughty | |
1585 | * character. But a backslash itself is naughty too, so that | |
1586 | * makes Q + 1 naughty characters, each of which needs a | |
1587 | * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks. | |
1588 | * | |
1589 | * Calculate this here rather than at each point toothpicks | |
1590 | * needs to be deployed. | |
1591 | */ | |
1592 | ||
1593 | subqfilt = 2*subqfilt + 1; | |
1594 | ||
1595 | else if (t - p == 1 && *p == 'l') | |
1596 | /* `u' -- convert to uppercase. | |
1597 | * | |
1598 | * If a case conversion is already set, then that will override | |
1599 | * whatever we do here, so don't bother. | |
1600 | */ | |
1601 | ||
1602 | { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; } | |
1603 | ||
1604 | else if (t - p == 1 && *p == 'u') | |
1605 | /* `u' -- convert to uppercase. | |
1606 | * | |
1607 | * If a case conversion is already set, then that will override | |
1608 | * whatever we do here, so don't bother. | |
1609 | */ | |
1610 | { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; } | |
1611 | ||
7b8ff279 | 1612 | else |
8996f767 | 1613 | /* Something else we didn't understand. */ |
7b8ff279 MW |
1614 | lose("%s:%u: unknown filter `%.*s'", |
1615 | file, line, (int)(t - p), p); | |
8996f767 MW |
1616 | |
1617 | /* Continue from after the filter name. */ | |
7b8ff279 MW |
1618 | p = t; |
1619 | } | |
8996f767 MW |
1620 | |
1621 | /* If we're not skipping, and we found a variable, then substitute | |
1622 | * its value. This is the point where we need to be careful about | |
1623 | * recursive expansion. | |
1624 | */ | |
7b8ff279 MW |
1625 | if (!(f&SF_SKIP) && var) { |
1626 | if (var->f&CF_EXPAND) | |
1627 | lose("%s:%u: recursive expansion of variable `%.*s'", | |
1628 | file, line, (int)(q1 - q0), q0); | |
1629 | var->f |= CF_EXPAND; | |
1630 | subst(var->val, var->val + var->n, sb, | |
1631 | var->file, var->line, subqfilt, | |
1632 | ff | (var->f&CF_LITERAL ? SF_LITERAL : 0)); | |
1633 | var->f &= ~CF_EXPAND; | |
1634 | } | |
8996f767 MW |
1635 | |
1636 | /* If there's an alternative, then we need to process (or maybe | |
1637 | * skip) it. Otherwise, we should complain if there was no | |
1638 | * veriable, and we're not skipping. | |
1639 | */ | |
7b8ff279 MW |
1640 | if (p < l && *p == '?') |
1641 | p = subst(p + 1, l, sb, file, line, subqfilt, | |
1642 | ff | SF_SUBEXPR | (var ? SF_SKIP : 0)); | |
1643 | else if (!var && !(f&SF_SKIP)) | |
1644 | lose("%s:%u: unknown variable `%.*s'", | |
1645 | file, line, (int)(q1 - q0), q0); | |
8996f767 MW |
1646 | |
1647 | /* Expect a `}' here. (No need to skip spaces: we already did that | |
1648 | * after scanning for filters, and either there was no alternative, | |
1649 | * or we advanced to a delimiter character anyway.) | |
1650 | */ | |
7b8ff279 MW |
1651 | if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line); |
1652 | p++; | |
1653 | break; | |
1654 | ||
1655 | default: | |
8996f767 MW |
1656 | /* Something else. That's a shame. */ |
1657 | lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p); | |
7b8ff279 | 1658 | } |
8996f767 MW |
1659 | |
1660 | /* Complain if we started out in word-splitting state, and therefore | |
1661 | * have added a whole number of words to the output, but there's a | |
1662 | * word-fragment stuck onto the end of this substitution. | |
1663 | */ | |
7b8ff279 MW |
1664 | if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) && |
1665 | !((f&SF_SUBEXPR) && (*p == '|' || *p == '}'))) | |
1666 | lose("%s:%u: surprising word boundary " | |
1667 | "after splicing substitution", | |
1668 | file, line); | |
1669 | } | |
1670 | ||
1671 | else { | |
8996f767 MW |
1672 | /* Something else. Try to skip over this at high speed. |
1673 | * | |
1674 | * This makes use of the table we set up earlier. | |
1675 | */ | |
1676 | ||
7b8ff279 MW |
1677 | n = strcspn(p, delimtab[f&SF_SPANMASK]); |
1678 | if (n > l - p) n = l - p; | |
8996f767 | 1679 | if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f); |
7b8ff279 MW |
1680 | p += n; f |= SF_WORD; |
1681 | } | |
1682 | } | |
1683 | ||
1684 | done: | |
8996f767 MW |
1685 | /* Sort out the wreckage. */ |
1686 | ||
1687 | /* If we're still within quotes then something has gone wrong. */ | |
7b8ff279 | 1688 | if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line); |
8996f767 MW |
1689 | |
1690 | /* If we're within a word, and should be splitting, then commit the word to | |
1691 | * the output list. | |
1692 | */ | |
7b8ff279 MW |
1693 | if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) { |
1694 | argv_append(sb->av, xstrndup(sb->d->p, sb->d->len)); | |
1695 | dstr_reset(sb->d); | |
1696 | } | |
1697 | ||
8996f767 | 1698 | /* And, with that, we're done. */ |
7b8ff279 MW |
1699 | return (p); |
1700 | } | |
1701 | ||
8996f767 MW |
1702 | /* Expand substitutions in a string. |
1703 | * | |
1704 | * Expand the null-terminated string P relative to the HOME section, using | |
1705 | * configuration CONFIG, and appending the result to dynamic string D. Blame | |
1706 | * WHAT in any error messages. | |
1707 | */ | |
7b8ff279 MW |
1708 | void config_subst_string(struct config *config, struct config_section *home, |
1709 | const char *what, const char *p, struct dstr *d) | |
1710 | { | |
1711 | struct subst sb; | |
1712 | ||
1713 | sb.config = config; sb.home = home; sb.d = d; | |
1714 | subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST); | |
1715 | dstr_putz(d); | |
1716 | } | |
1717 | ||
8996f767 MW |
1718 | /* Expand substitutions in a string. |
1719 | * | |
1720 | * Expand the null-terminated string P relative to the HOME section, using | |
1721 | * configuration CONFIG, returning the result as a freshly malloc(3)ed | |
1722 | * string. Blame WHAT in any error messages. | |
1723 | */ | |
7b8ff279 MW |
1724 | char *config_subst_string_alloc(struct config *config, |
1725 | struct config_section *home, | |
1726 | const char *what, const char *p) | |
1727 | { | |
1728 | struct dstr d = DSTR_INIT; | |
1729 | char *q; | |
1730 | ||
1731 | config_subst_string(config, home, what, p, &d); | |
1732 | q = xstrndup(d.p, d.len); dstr_release(&d); return (q); | |
1733 | } | |
1734 | ||
8996f767 MW |
1735 | /* Expand substitutions in a variable. |
1736 | * | |
1737 | * Expand the value of the variable VAR relative to the HOME section, using | |
1738 | * configuration CONFIG, appending the result to dynamic string D. | |
1739 | */ | |
7b8ff279 MW |
1740 | void config_subst_var(struct config *config, struct config_section *home, |
1741 | struct config_var *var, struct dstr *d) | |
1742 | { | |
1743 | struct subst sb; | |
1744 | ||
1745 | sb.config = config; sb.home = home; sb.d = d; | |
1746 | var->f |= CF_EXPAND; | |
1747 | subst(var->val, var->val + var->n, &sb, var->file, var->line, 0, | |
1748 | SF_SUBST | (var->f&CF_LITERAL ? SF_LITERAL : 0)); | |
1749 | var->f &= ~CF_EXPAND; | |
1750 | dstr_putz(d); | |
1751 | } | |
1752 | ||
8996f767 MW |
1753 | /* Expand substitutions in a variable. |
1754 | * | |
1755 | * Expand the value of the variable VAR relative to the HOME section, using | |
1756 | * configuration CONFIG, returning the result as a freshly malloc(3)ed | |
1757 | * string. | |
1758 | */ | |
7b8ff279 MW |
1759 | char *config_subst_var_alloc(struct config *config, |
1760 | struct config_section *home, | |
1761 | struct config_var *var) | |
1762 | { | |
1763 | struct dstr d = DSTR_INIT; | |
1764 | char *q; | |
1765 | ||
1766 | config_subst_var(config, home, var, &d); | |
1767 | q = xstrndup(d.p, d.len); dstr_release(&d); return (q); | |
1768 | } | |
1769 | ||
8996f767 MW |
1770 | /* Expand substitutions in a variable and split into words. |
1771 | * | |
1772 | * Expand and word-split the value of the variable VAR relative to the HOME | |
1773 | * section, using configuration CONFIG, appending the resulting words into | |
1774 | * the vector AV. | |
1775 | */ | |
7b8ff279 MW |
1776 | void config_subst_split_var(struct config *config, |
1777 | struct config_section *home, | |
1778 | struct config_var *var, struct argv *av) | |
1779 | { | |
1780 | struct dstr d = DSTR_INIT; | |
1781 | struct subst sb; | |
1782 | ||
1783 | sb.config = config; sb.home = home; sb.av = av; sb.d = &d; | |
1784 | var->f |= CF_EXPAND; | |
1785 | subst(var->val, var->val + var->n, &sb, var->file, var->line, 0, | |
1786 | SF_SUBST | SF_SPLIT | (var->f&CF_LITERAL ? SF_LITERAL : 0)); | |
1787 | var->f &= ~CF_EXPAND; | |
1788 | dstr_release(&d); | |
1789 | } | |
1790 | ||
1791 | /*----- That's all, folks -------------------------------------------------*/ |