Switch the memory allocation macros from the Halibut ones
[sgt/halibut] / misc.c
1 /*
2 * misc.c: miscellaneous useful items
3 */
4
5 #include <stdarg.h>
6 #include "halibut.h"
7
8 char *adv(char *s) {
9 return s + 1 + strlen(s);
10 }
11
12 struct stackTag {
13 void **data;
14 int sp;
15 int size;
16 };
17
18 stack stk_new(void) {
19 stack s;
20
21 s = snew(struct stackTag);
22 s->sp = 0;
23 s->size = 0;
24 s->data = NULL;
25
26 return s;
27 }
28
29 void stk_free(stack s) {
30 sfree(s->data);
31 sfree(s);
32 }
33
34 void stk_push(stack s, void *item) {
35 if (s->size <= s->sp) {
36 s->size = s->sp + 32;
37 s->data = sresize(s->data, s->size, void *);
38 }
39 s->data[s->sp++] = item;
40 }
41
42 void *stk_pop(stack s) {
43 if (s->sp > 0)
44 return s->data[--s->sp];
45 else
46 return NULL;
47 }
48
49 void *stk_top(stack s) {
50 if (s->sp > 0)
51 return s->data[s->sp-1];
52 else
53 return NULL;
54 }
55
56 /*
57 * Small routines to amalgamate a string from an input source.
58 */
59 const rdstring empty_rdstring = {0, 0, NULL};
60 const rdstringc empty_rdstringc = {0, 0, NULL};
61
62 void rdadd(rdstring *rs, wchar_t c) {
63 if (rs->pos >= rs->size-1) {
64 rs->size = rs->pos + 128;
65 rs->text = sresize(rs->text, rs->size, wchar_t);
66 }
67 rs->text[rs->pos++] = c;
68 rs->text[rs->pos] = 0;
69 }
70 void rdadds(rdstring *rs, wchar_t const *p) {
71 int len = ustrlen(p);
72 if (rs->pos >= rs->size - len) {
73 rs->size = rs->pos + len + 128;
74 rs->text = sresize(rs->text, rs->size, wchar_t);
75 }
76 ustrcpy(rs->text + rs->pos, p);
77 rs->pos += len;
78 }
79 wchar_t *rdtrim(rdstring *rs) {
80 rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
81 return rs->text;
82 }
83
84 void rdaddc(rdstringc *rs, char c) {
85 if (rs->pos >= rs->size-1) {
86 rs->size = rs->pos + 128;
87 rs->text = sresize(rs->text, rs->size, char);
88 }
89 rs->text[rs->pos++] = c;
90 rs->text[rs->pos] = 0;
91 }
92 void rdaddsc(rdstringc *rs, char const *p) {
93 int len = strlen(p);
94 if (rs->pos >= rs->size - len) {
95 rs->size = rs->pos + len + 128;
96 rs->text = sresize(rs->text, rs->size, char);
97 }
98 strcpy(rs->text + rs->pos, p);
99 rs->pos += len;
100 }
101 char *rdtrimc(rdstringc *rs) {
102 rs->text = sresize(rs->text, rs->pos + 1, char);
103 return rs->text;
104 }
105
106 static int compare_wordlists_literally(word *a, word *b) {
107 int t;
108 while (a && b) {
109 if (a->type != b->type)
110 return (a->type < b->type ? -1 : +1); /* FIXME? */
111 t = a->type;
112 if ((t != word_Normal && t != word_Code &&
113 t != word_WeakCode && t != word_Emph) ||
114 a->alt || b->alt) {
115 int c;
116 if (a->text && b->text) {
117 c = ustricmp(a->text, b->text);
118 if (c)
119 return c;
120 }
121 c = compare_wordlists_literally(a->alt, b->alt);
122 if (c)
123 return c;
124 a = a->next;
125 b = b->next;
126 } else {
127 wchar_t *ap = a->text, *bp = b->text;
128 while (*ap && *bp) {
129 wchar_t ac = utolower(*ap), bc = utolower(*bp);
130 if (ac != bc)
131 return (ac < bc ? -1 : +1);
132 if (!*++ap && a->next && a->next->type == t && !a->next->alt)
133 a = a->next, ap = a->text;
134 if (!*++bp && b->next && b->next->type == t && !b->next->alt)
135 b = b->next, bp = b->text;
136 }
137 if (*ap || *bp)
138 return (*ap ? +1 : -1);
139 a = a->next;
140 b = b->next;
141 }
142 }
143
144 if (a || b)
145 return (a ? +1 : -1);
146 else
147 return 0;
148 }
149
150 int compare_wordlists(word *a, word *b) {
151 /*
152 * First we compare only the alphabetic content of the word
153 * lists, with case not a factor. If that comes out equal,
154 * _then_ we compare the word lists literally.
155 */
156 struct {
157 word *w;
158 int i;
159 wchar_t c;
160 } pos[2];
161
162 pos[0].w = a;
163 pos[1].w = b;
164 pos[0].i = pos[1].i = 0;
165
166 while (1) {
167 /*
168 * Find the next alphabetic character in each word list.
169 */
170 int k;
171
172 for (k = 0; k < 2; k++) {
173 /*
174 * Advance until we hit either an alphabetic character
175 * or the end of the word list.
176 */
177 while (1) {
178 if (!pos[k].w) {
179 /* End of word list. */
180 pos[k].c = 0;
181 break;
182 } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
183 /* No characters remaining in this word; move on. */
184 pos[k].w = pos[k].w->next;
185 pos[k].i = 0;
186 } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
187 /* This character isn't alphabetic; move on. */
188 pos[k].i++;
189 } else {
190 /* We have an alphabetic! Lowercase it and continue. */
191 pos[k].c = utolower(pos[k].w->text[pos[k].i]);
192 break;
193 }
194 }
195 }
196
197 #ifdef HAS_WCSCOLL
198 {
199 wchar_t a[2], b[2];
200 int ret;
201
202 a[0] = pos[0].c;
203 b[0] = pos[1].c;
204 a[1] = b[1] = L'\0';
205
206 ret = wcscoll(a, b);
207 if (ret)
208 return ret;
209 }
210 #else
211 if (pos[0].c < pos[1].c)
212 return -1;
213 else if (pos[0].c > pos[1].c)
214 return +1;
215 #endif
216
217 if (!pos[0].c)
218 break; /* they're equal */
219
220 pos[0].i++;
221 pos[1].i++;
222 }
223
224 /*
225 * If we reach here, the strings were alphabetically equal, so
226 * compare in more detail.
227 */
228 return compare_wordlists_literally(a, b);
229 }
230
231 void mark_attr_ends(word *words)
232 {
233 word *w, *wp;
234
235 wp = NULL;
236 for (w = words; w; w = w->next) {
237 if (isattr(w->type)) {
238 int before = (wp && isattr(wp->type) &&
239 sameattr(wp->type, w->type));
240 int after = (w->next && isattr(w->next->type) &&
241 sameattr(w->next->type, w->type));
242 w->aux |= (before ?
243 (after ? attr_Always : attr_Last) :
244 (after ? attr_First : attr_Only));
245 }
246 wp = w;
247 }
248 }
249
250 /*
251 * This function implements the optimal paragraph wrapping
252 * algorithm, pretty much as used in TeX. A cost function is
253 * defined for each line of the wrapped paragraph (typically some
254 * convex function of the difference between the line's length and
255 * its desired length), and a dynamic programming approach is used
256 * to optimise globally across all possible layouts of the
257 * paragraph to find the one with the minimum total cost.
258 *
259 * The function as implemented here gives a choice of two options
260 * for the cost function:
261 *
262 * - If `natural_space' is zero, then the algorithm attempts to
263 * make each line the maximum possible width (either `width' or
264 * `subsequentwidth' depending on whether it's the first line of
265 * the paragraph or not), and the cost function is simply the
266 * square of the unused space at the end of each line. This is a
267 * simple mechanism suitable for use in fixed-pitch environments
268 * such as plain text displayed on a terminal.
269 *
270 * - However, if `natural_space' is positive, the algorithm
271 * assumes the medium is fully graphical and that the width of
272 * space characters can be adjusted finely, and it attempts to
273 * make each _space character_ the width given in
274 * `natural_space'. (The provided width function should return
275 * the _minimum_ acceptable width of a space character in this
276 * case.) Therefore, the cost function for a line is dependent
277 * on the number of spaces on that line as well as the amount by
278 * which the line width differs from the optimum.
279 */
280 wrappedline *wrap_para(word *text, int width, int subsequentwidth,
281 int (*widthfn)(void *, word *), void *ctx,
282 int natural_space) {
283 wrappedline *head = NULL, **ptr = &head;
284 int nwords, wordsize;
285 struct wrapword {
286 word *begin, *end;
287 int width;
288 int spacewidth;
289 int cost;
290 int nwords;
291 } *wrapwords;
292 int i, j, n;
293
294 /*
295 * Break the line up into wrappable components.
296 */
297 nwords = wordsize = 0;
298 wrapwords = NULL;
299 while (text) {
300 if (nwords >= wordsize) {
301 wordsize = nwords + 64;
302 wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
303 }
304 wrapwords[nwords].width = 0;
305 wrapwords[nwords].begin = text;
306 while (text) {
307 wrapwords[nwords].width += widthfn(ctx, text);
308 wrapwords[nwords].end = text->next;
309 if (text->next && (text->next->type == word_WhiteSpace ||
310 text->next->type == word_EmphSpace ||
311 text->breaks))
312 break;
313 text = text->next;
314 }
315 if (text && text->next && (text->next->type == word_WhiteSpace ||
316 text->next->type == word_EmphSpace)) {
317 wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
318 text = text->next;
319 } else {
320 wrapwords[nwords].spacewidth = 0;
321 }
322 nwords++;
323 if (text)
324 text = text->next;
325 }
326
327 /*
328 * Perform the dynamic wrapping algorithm: work backwards from
329 * nwords-1, determining the optimal wrapping for each terminal
330 * subsequence of the paragraph.
331 */
332 for (i = nwords; i-- ;) {
333 int best = -1;
334 int bestcost = 0;
335 int cost;
336 int linelen = 0, spacewidth = 0, minspacewidth = 0;
337 int nspaces;
338 int thiswidth = (i == 0 ? width : subsequentwidth);
339
340 j = 0;
341 nspaces = 0;
342 while (i+j < nwords) {
343 /*
344 * See what happens if we put j+1 words on this line.
345 */
346 if (spacewidth) {
347 nspaces++;
348 minspacewidth = spacewidth;
349 }
350 linelen += spacewidth + wrapwords[i+j].width;
351 spacewidth = wrapwords[i+j].spacewidth;
352 j++;
353 if (linelen > thiswidth) {
354 /*
355 * If we're over the width limit, abandon ship,
356 * _unless_ there is no best-effort yet (which will
357 * only happen if the first word is too long all by
358 * itself).
359 */
360 if (best > 0)
361 break;
362 }
363
364 /*
365 * Compute the cost of this line. The method of doing
366 * this differs hugely depending on whether
367 * natural_space is nonzero or not.
368 */
369 if (natural_space) {
370 if (!nspaces && linelen > thiswidth) {
371 /*
372 * Special case: if there are no spaces at all
373 * on the line because one single word is too
374 * long for its line, cost is zero because
375 * there's nothing we can do about it anyway.
376 */
377 cost = 0;
378 } else {
379 int shortfall = thiswidth - linelen;
380 int spaceextra = shortfall / (nspaces ? nspaces : 1);
381 int spaceshortfall = natural_space -
382 (minspacewidth + spaceextra);
383
384 if (i+j == nwords && spaceshortfall < 0) {
385 /*
386 * Special case: on the very last line of
387 * the paragraph, we don't score penalty
388 * points for having to _stretch_ the line,
389 * since we won't stretch it anyway.
390 * However, we score penalties as normal
391 * for having to squeeze it.
392 */
393 cost = 0;
394 } else {
395 /*
396 * Squaring this number is tricky since
397 * it's liable to be quite big. Let's
398 * divide it through by 256.
399 */
400 int x = spaceshortfall >> 8;
401 int xf = spaceshortfall & 0xFF;
402
403 /*
404 * Not counting strange variable-fixed-
405 * point oddities, we are computing
406 *
407 * (x+xf)^2 = x^2 + 2*x*xf + xf*xf
408 *
409 * except that _our_ xf is 256 times the
410 * one listed there.
411 */
412
413 cost = x * x;
414 cost += (2 * x * xf) >> 8;
415 }
416 }
417 } else {
418 if (i+j == nwords) {
419 /*
420 * Special case: if we're at the very end of the
421 * paragraph, we don't score penalty points for the
422 * white space left on the line.
423 */
424 cost = 0;
425 } else {
426 cost = (thiswidth-linelen) * (thiswidth-linelen);
427 }
428 }
429
430 /*
431 * Add in the cost of wrapping all lines after this
432 * point too.
433 */
434 if (i+j < nwords)
435 cost += wrapwords[i+j].cost;
436
437 /*
438 * We compare bestcost >= cost, not bestcost > cost,
439 * because in cases where the costs are identical we
440 * want to try to look like the greedy algorithm,
441 * because readers are likely to have spent a lot of
442 * time looking at greedy-wrapped paragraphs and
443 * there's no point violating the Principle of Least
444 * Surprise if it doesn't actually gain anything.
445 */
446 if (best < 0 || bestcost >= cost) {
447 bestcost = cost;
448 best = j;
449 }
450 }
451 /*
452 * Now we know the optimal answer for this terminal
453 * subsequence, so put it in wrapwords.
454 */
455 wrapwords[i].cost = bestcost;
456 wrapwords[i].nwords = best;
457 }
458
459 /*
460 * We've wrapped the paragraph. Now build the output
461 * `wrappedline' list.
462 */
463 i = 0;
464 while (i < nwords) {
465 wrappedline *w = snew(wrappedline);
466 *ptr = w;
467 ptr = &w->next;
468 w->next = NULL;
469
470 n = wrapwords[i].nwords;
471 w->begin = wrapwords[i].begin;
472 w->end = wrapwords[i+n-1].end;
473
474 /*
475 * Count along the words to find nspaces and shortfall.
476 */
477 w->nspaces = 0;
478 w->shortfall = width;
479 for (j = 0; j < n; j++) {
480 w->shortfall -= wrapwords[i+j].width;
481 if (j < n-1 && wrapwords[i+j].spacewidth) {
482 w->nspaces++;
483 w->shortfall -= wrapwords[i+j].spacewidth;
484 }
485 }
486 i += n;
487 }
488
489 sfree(wrapwords);
490
491 return head;
492 }
493
494 void wrap_free(wrappedline *w) {
495 while (w) {
496 wrappedline *t = w->next;
497 sfree(w);
498 w = t;
499 }
500 }
501
502 void cmdline_cfg_add(paragraph *cfg, char *string)
503 {
504 wchar_t *ustring;
505 int upos, ulen, pos, len;
506
507 ulen = 0;
508 while (cfg->keyword[ulen])
509 ulen += 1 + ustrlen(cfg->keyword+ulen);
510 len = 0;
511 while (cfg->origkeyword[len])
512 len += 1 + strlen(cfg->origkeyword+len);
513
514 ustring = ufroma_locale_dup(string);
515
516 upos = ulen;
517 ulen += 2 + ustrlen(ustring);
518 cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
519 ustrcpy(cfg->keyword+upos, ustring);
520 cfg->keyword[ulen-1] = L'\0';
521
522 pos = len;
523 len += 2 + strlen(string);
524 cfg->origkeyword = sresize(cfg->origkeyword, len, char);
525 strcpy(cfg->origkeyword+pos, string);
526 cfg->origkeyword[len-1] = '\0';
527
528 sfree(ustring);
529 }
530
531 paragraph *cmdline_cfg_new(void)
532 {
533 paragraph *p;
534
535 p = snew(paragraph);
536 memset(p, 0, sizeof(*p));
537 p->type = para_Config;
538 p->next = NULL;
539 p->fpos.filename = "<command line>";
540 p->fpos.line = p->fpos.col = -1;
541 p->keyword = ustrdup(L"\0");
542 p->origkeyword = dupstr("\0");
543
544 return p;
545 }
546
547 paragraph *cmdline_cfg_simple(char *string, ...)
548 {
549 va_list ap;
550 char *s;
551 paragraph *p;
552
553 p = cmdline_cfg_new();
554 cmdline_cfg_add(p, string);
555
556 va_start(ap, string);
557 while ((s = va_arg(ap, char *)) != NULL)
558 cmdline_cfg_add(p, s);
559 va_end(ap);
560
561 return p;
562 }