Initial work on PS and PDF output. Because these two backends share
[sgt/halibut] / misc.c
1 /*
2 * misc.c: miscellaneous useful items
3 */
4
5 #include "halibut.h"
6
7 struct stackTag {
8 void **data;
9 int sp;
10 int size;
11 };
12
13 stack stk_new(void) {
14 stack s;
15
16 s = mknew(struct stackTag);
17 s->sp = 0;
18 s->size = 0;
19 s->data = NULL;
20
21 return s;
22 }
23
24 void stk_free(stack s) {
25 sfree(s->data);
26 sfree(s);
27 }
28
29 void stk_push(stack s, void *item) {
30 if (s->size <= s->sp) {
31 s->size = s->sp + 32;
32 s->data = resize(s->data, s->size);
33 }
34 s->data[s->sp++] = item;
35 }
36
37 void *stk_pop(stack s) {
38 if (s->sp > 0)
39 return s->data[--s->sp];
40 else
41 return NULL;
42 }
43
44 void *stk_top(stack s) {
45 if (s->sp > 0)
46 return s->data[s->sp-1];
47 else
48 return NULL;
49 }
50
51 /*
52 * Small routines to amalgamate a string from an input source.
53 */
54 const rdstring empty_rdstring = {0, 0, NULL};
55 const rdstringc empty_rdstringc = {0, 0, NULL};
56
57 void rdadd(rdstring *rs, wchar_t c) {
58 if (rs->pos >= rs->size-1) {
59 rs->size = rs->pos + 128;
60 rs->text = resize(rs->text, rs->size);
61 }
62 rs->text[rs->pos++] = c;
63 rs->text[rs->pos] = 0;
64 }
65 void rdadds(rdstring *rs, wchar_t const *p) {
66 int len = ustrlen(p);
67 if (rs->pos >= rs->size - len) {
68 rs->size = rs->pos + len + 128;
69 rs->text = resize(rs->text, rs->size);
70 }
71 ustrcpy(rs->text + rs->pos, p);
72 rs->pos += len;
73 }
74 wchar_t *rdtrim(rdstring *rs) {
75 rs->text = resize(rs->text, rs->pos + 1);
76 return rs->text;
77 }
78
79 void rdaddc(rdstringc *rs, char c) {
80 if (rs->pos >= rs->size-1) {
81 rs->size = rs->pos + 128;
82 rs->text = resize(rs->text, rs->size);
83 }
84 rs->text[rs->pos++] = c;
85 rs->text[rs->pos] = 0;
86 }
87 void rdaddsc(rdstringc *rs, char const *p) {
88 int len = strlen(p);
89 if (rs->pos >= rs->size - len) {
90 rs->size = rs->pos + len + 128;
91 rs->text = resize(rs->text, rs->size);
92 }
93 strcpy(rs->text + rs->pos, p);
94 rs->pos += len;
95 }
96 char *rdtrimc(rdstringc *rs) {
97 rs->text = resize(rs->text, rs->pos + 1);
98 return rs->text;
99 }
100
101 static int compare_wordlists_literally(word *a, word *b) {
102 int t;
103 while (a && b) {
104 if (a->type != b->type)
105 return (a->type < b->type ? -1 : +1); /* FIXME? */
106 t = a->type;
107 if ((t != word_Normal && t != word_Code &&
108 t != word_WeakCode && t != word_Emph) ||
109 a->alt || b->alt) {
110 int c;
111 if (a->text && b->text) {
112 c = ustricmp(a->text, b->text);
113 if (c)
114 return c;
115 }
116 c = compare_wordlists_literally(a->alt, b->alt);
117 if (c)
118 return c;
119 a = a->next;
120 b = b->next;
121 } else {
122 wchar_t *ap = a->text, *bp = b->text;
123 while (*ap && *bp) {
124 wchar_t ac = utolower(*ap), bc = utolower(*bp);
125 if (ac != bc)
126 return (ac < bc ? -1 : +1);
127 if (!*++ap && a->next && a->next->type == t && !a->next->alt)
128 a = a->next, ap = a->text;
129 if (!*++bp && b->next && b->next->type == t && !b->next->alt)
130 b = b->next, bp = b->text;
131 }
132 if (*ap || *bp)
133 return (*ap ? +1 : -1);
134 a = a->next;
135 b = b->next;
136 }
137 }
138
139 if (a || b)
140 return (a ? +1 : -1);
141 else
142 return 0;
143 }
144
145 int compare_wordlists(word *a, word *b) {
146 /*
147 * First we compare only the alphabetic content of the word
148 * lists, with case not a factor. If that comes out equal,
149 * _then_ we compare the word lists literally.
150 */
151 struct {
152 word *w;
153 int i;
154 wchar_t c;
155 } pos[2];
156
157 pos[0].w = a;
158 pos[1].w = b;
159 pos[0].i = pos[1].i = 0;
160
161 while (1) {
162 /*
163 * Find the next alphabetic character in each word list.
164 */
165 int k;
166
167 for (k = 0; k < 2; k++) {
168 /*
169 * Advance until we hit either an alphabetic character
170 * or the end of the word list.
171 */
172 while (1) {
173 if (!pos[k].w) {
174 /* End of word list. */
175 pos[k].c = 0;
176 break;
177 } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
178 /* No characters remaining in this word; move on. */
179 pos[k].w = pos[k].w->next;
180 pos[k].i = 0;
181 } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
182 /* This character isn't alphabetic; move on. */
183 pos[k].i++;
184 } else {
185 /* We have an alphabetic! Lowercase it and continue. */
186 pos[k].c = utolower(pos[k].w->text[pos[k].i]);
187 break;
188 }
189 }
190 }
191
192 if (pos[0].c < pos[1].c)
193 return -1;
194 else if (pos[0].c > pos[1].c)
195 return +1;
196
197 if (!pos[0].c)
198 break; /* they're equal */
199
200 pos[0].i++;
201 pos[1].i++;
202 }
203
204 /*
205 * If we reach here, the strings were alphabetically equal, so
206 * compare in more detail.
207 */
208 return compare_wordlists_literally(a, b);
209 }
210
211 void mark_attr_ends(paragraph *sourceform) {
212 paragraph *p;
213 word *w, *wp;
214 for (p = sourceform; p; p = p->next) {
215 wp = NULL;
216 for (w = p->words; w; w = w->next) {
217 if (isattr(w->type)) {
218 int before = (wp && isattr(wp->type) &&
219 sameattr(wp->type, w->type));
220 int after = (w->next && isattr(w->next->type) &&
221 sameattr(w->next->type, w->type));
222 w->aux |= (before ?
223 (after ? attr_Always : attr_Last) :
224 (after ? attr_First : attr_Only));
225 }
226 wp = w;
227 }
228 }
229 }
230
231 /*
232 * This function implements the optimal paragraph wrapping
233 * algorithm, pretty much as used in TeX. A cost function is
234 * defined for each line of the wrapped paragraph (typically some
235 * convex function of the difference between the line's length and
236 * its desired length), and a dynamic programming approach is used
237 * to optimise globally across all possible layouts of the
238 * paragraph to find the one with the minimum total cost.
239 *
240 * The function as implemented here gives a choice of two options
241 * for the cost function:
242 *
243 * - If `natural_space' is zero, then the algorithm attempts to
244 * make each line the maximum possible width (either `width' or
245 * `subsequentwidth' depending on whether it's the first line of
246 * the paragraph or not), and the cost function is simply the
247 * square of the unused space at the end of each line. This is a
248 * simple mechanism suitable for use in fixed-pitch environments
249 * such as plain text displayed on a terminal.
250 *
251 * - However, if `natural_space' is positive, the algorithm
252 * assumes the medium is fully graphical and that the width of
253 * space characters can be adjusted finely, and it attempts to
254 * make each _space character_ the width given in
255 * `natural_space'. (The provided width function should return
256 * the _minimum_ acceptable width of a space character in this
257 * case.) Therefore, the cost function for a line is dependent
258 * on the number of spaces on that line as well as the amount by
259 * which the line width differs from the optimum.
260 */
261 wrappedline *wrap_para(word *text, int width, int subsequentwidth,
262 int (*widthfn)(void *, word *), void *ctx,
263 int natural_space) {
264 wrappedline *head = NULL, **ptr = &head;
265 int nwords, wordsize;
266 struct wrapword {
267 word *begin, *end;
268 int width;
269 int spacewidth;
270 int cost;
271 int nwords;
272 } *wrapwords;
273 int i, j, n;
274
275 /*
276 * Break the line up into wrappable components.
277 */
278 nwords = wordsize = 0;
279 wrapwords = NULL;
280 while (text) {
281 if (nwords >= wordsize) {
282 wordsize = nwords + 64;
283 wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
284 }
285 wrapwords[nwords].width = 0;
286 wrapwords[nwords].begin = text;
287 while (text) {
288 wrapwords[nwords].width += widthfn(ctx, text);
289 wrapwords[nwords].end = text->next;
290 if (text->next && (text->next->type == word_WhiteSpace ||
291 text->next->type == word_EmphSpace ||
292 text->breaks))
293 break;
294 text = text->next;
295 }
296 if (text && text->next && (text->next->type == word_WhiteSpace ||
297 text->next->type == word_EmphSpace)) {
298 wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
299 text = text->next;
300 } else {
301 wrapwords[nwords].spacewidth = 0;
302 }
303 nwords++;
304 if (text)
305 text = text->next;
306 }
307
308 /*
309 * Perform the dynamic wrapping algorithm: work backwards from
310 * nwords-1, determining the optimal wrapping for each terminal
311 * subsequence of the paragraph.
312 */
313 for (i = nwords; i-- ;) {
314 int best = -1;
315 int bestcost = 0;
316 int cost;
317 int linelen = 0, spacewidth = 0, minspacewidth = 0;
318 int nspaces;
319 int thiswidth = (i == 0 ? width : subsequentwidth);
320
321 j = 0;
322 nspaces = 0;
323 while (i+j < nwords) {
324 /*
325 * See what happens if we put j+1 words on this line.
326 */
327 if (spacewidth) {
328 nspaces++;
329 minspacewidth = spacewidth;
330 }
331 linelen += spacewidth + wrapwords[i+j].width;
332 spacewidth = wrapwords[i+j].spacewidth;
333 j++;
334 if (linelen > thiswidth) {
335 /*
336 * If we're over the width limit, abandon ship,
337 * _unless_ there is no best-effort yet (which will
338 * only happen if the first word is too long all by
339 * itself).
340 */
341 if (best > 0)
342 break;
343 }
344
345 /*
346 * Compute the cost of this line. The method of doing
347 * this differs hugely depending on whether
348 * natural_space is nonzero or not.
349 */
350 if (natural_space) {
351 if (!nspaces && linelen > thiswidth) {
352 /*
353 * Special case: if there are no spaces at all
354 * on the line because one single word is too
355 * long for its line, cost is zero because
356 * there's nothing we can do about it anyway.
357 */
358 cost = 0;
359 } else {
360 int shortfall = thiswidth - linelen;
361 int spaceextra = shortfall / (nspaces ? nspaces : 1);
362 int spaceshortfall = natural_space -
363 (minspacewidth + spaceextra);
364
365 if (i+j == nwords && spaceshortfall < 0) {
366 /*
367 * Special case: on the very last line of
368 * the paragraph, we don't score penalty
369 * points for having to _stretch_ the line,
370 * since we won't stretch it anyway.
371 * However, we score penalties as normal
372 * for having to squeeze it.
373 */
374 cost = 0;
375 } else {
376 /*
377 * Squaring this number is tricky since
378 * it's liable to be quite big. Let's
379 * divide it through by 256.
380 */
381 int x = spaceshortfall >> 8;
382 int xf = spaceshortfall & 0xFF;
383
384 /*
385 * Not counting strange variable-fixed-
386 * point oddities, we are computing
387 *
388 * (x+xf)^2 = x^2 + 2*x*xf + xf*xf
389 *
390 * except that _our_ xf is 256 times the
391 * one listed there.
392 */
393
394 cost = x * x;
395 cost += (2 * x * xf) >> 8;
396 }
397 }
398 } else {
399 if (i+j == nwords) {
400 /*
401 * Special case: if we're at the very end of the
402 * paragraph, we don't score penalty points for the
403 * white space left on the line.
404 */
405 cost = 0;
406 } else {
407 cost = (thiswidth-linelen) * (thiswidth-linelen);
408 }
409 }
410
411 /*
412 * Add in the cost of wrapping all lines after this
413 * point too.
414 */
415 if (i+j < nwords)
416 cost += wrapwords[i+j].cost;
417
418 /*
419 * We compare bestcost >= cost, not bestcost > cost,
420 * because in cases where the costs are identical we
421 * want to try to look like the greedy algorithm,
422 * because readers are likely to have spent a lot of
423 * time looking at greedy-wrapped paragraphs and
424 * there's no point violating the Principle of Least
425 * Surprise if it doesn't actually gain anything.
426 */
427 if (best < 0 || bestcost >= cost) {
428 bestcost = cost;
429 best = j;
430 }
431 }
432 /*
433 * Now we know the optimal answer for this terminal
434 * subsequence, so put it in wrapwords.
435 */
436 wrapwords[i].cost = bestcost;
437 wrapwords[i].nwords = best;
438 }
439
440 /*
441 * We've wrapped the paragraph. Now build the output
442 * `wrappedline' list.
443 */
444 i = 0;
445 while (i < nwords) {
446 wrappedline *w = mknew(wrappedline);
447 *ptr = w;
448 ptr = &w->next;
449 w->next = NULL;
450
451 n = wrapwords[i].nwords;
452 w->begin = wrapwords[i].begin;
453 w->end = wrapwords[i+n-1].end;
454
455 /*
456 * Count along the words to find nspaces and shortfall.
457 */
458 w->nspaces = 0;
459 w->shortfall = width;
460 for (j = 0; j < n; j++) {
461 w->shortfall -= wrapwords[i+j].width;
462 if (j < n-1 && wrapwords[i+j].spacewidth) {
463 w->nspaces++;
464 w->shortfall -= wrapwords[i+j].spacewidth;
465 }
466 }
467 i += n;
468 }
469
470 sfree(wrapwords);
471
472 return head;
473 }
474
475 void wrap_free(wrappedline *w) {
476 while (w) {
477 wrappedline *t = w->next;
478 sfree(w);
479 w = t;
480 }
481 }