Yikes! Stack trash I'd never noticed before. Oops.
[sgt/halibut] / bk_text.c
1 /*
2 * text backend for Halibut
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <assert.h>
8 #include "halibut.h"
9
10 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
11 typedef struct {
12 alignment align;
13 int just_numbers;
14 wchar_t underline;
15 wchar_t *number_suffix;
16 } alignstruct;
17
18 typedef struct {
19 int indent, indent_code;
20 int listindentbefore, listindentafter;
21 int width;
22 alignstruct atitle, achapter, *asect;
23 int nasect;
24 int include_version_id;
25 int indent_preambles;
26 word bullet;
27 char *filename;
28 } textconfig;
29
30 static int text_convert(wchar_t *, char **);
31
32 static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
33 static void text_rule(FILE *, int, int);
34 static void text_para(FILE *, word *, char *, word *, int, int, int);
35 static void text_codepara(FILE *, word *, int, int);
36 static void text_versionid(FILE *, word *);
37
38 static alignment utoalign(wchar_t *p) {
39 if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
40 return CENTRE;
41 if (!ustricmp(p, L"leftplus"))
42 return LEFTPLUS;
43 return LEFT;
44 }
45
46 static textconfig text_configure(paragraph *source) {
47 textconfig ret;
48
49 /*
50 * Non-negotiables.
51 */
52 ret.bullet.next = NULL;
53 ret.bullet.alt = NULL;
54 ret.bullet.type = word_Normal;
55 ret.atitle.just_numbers = FALSE; /* ignored */
56
57 /*
58 * Defaults.
59 */
60 ret.indent = 7;
61 ret.indent_code = 2;
62 ret.listindentbefore = 1;
63 ret.listindentafter = 3;
64 ret.width = 68;
65 ret.atitle.align = CENTRE;
66 ret.atitle.underline = L'=';
67 ret.achapter.align = LEFT;
68 ret.achapter.just_numbers = FALSE;
69 ret.achapter.number_suffix = L": ";
70 ret.achapter.underline = L'-';
71 ret.nasect = 1;
72 ret.asect = mknewa(alignstruct, ret.nasect);
73 ret.asect[0].align = LEFTPLUS;
74 ret.asect[0].just_numbers = TRUE;
75 ret.asect[0].number_suffix = L" ";
76 ret.asect[0].underline = L'\0';
77 ret.include_version_id = TRUE;
78 ret.indent_preambles = FALSE;
79 ret.bullet.text = L"-";
80 ret.filename = dupstr("output.txt");
81
82 for (; source; source = source->next) {
83 if (source->type == para_Config) {
84 if (!ustricmp(source->keyword, L"text-indent")) {
85 ret.indent = utoi(uadv(source->keyword));
86 } else if (!ustricmp(source->keyword, L"text-filename")) {
87 sfree(ret.filename);
88 ret.filename = utoa_dup(uadv(source->keyword));
89 } else if (!ustricmp(source->keyword, L"text-indent-code")) {
90 ret.indent_code = utoi(uadv(source->keyword));
91 } else if (!ustricmp(source->keyword, L"text-width")) {
92 ret.width = utoi(uadv(source->keyword));
93 } else if (!ustricmp(source->keyword, L"text-list-indent")) {
94 ret.listindentbefore = utoi(uadv(source->keyword));
95 } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
96 ret.listindentafter = utoi(uadv(source->keyword));
97 } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
98 ret.achapter.align = utoalign(uadv(source->keyword));
99 } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
100 ret.achapter.underline = *uadv(source->keyword);
101 } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
102 ret.achapter.just_numbers = utob(uadv(source->keyword));
103 } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
104 ret.achapter.number_suffix = uadv(source->keyword);
105 } else if (!ustricmp(source->keyword, L"text-section-align")) {
106 wchar_t *p = uadv(source->keyword);
107 int n = 0;
108 if (uisdigit(*p)) {
109 n = utoi(p);
110 p = uadv(p);
111 }
112 if (n >= ret.nasect) {
113 int i;
114 ret.asect = resize(ret.asect, n+1);
115 for (i = ret.nasect; i <= n; i++)
116 ret.asect[i] = ret.asect[ret.nasect-1];
117 ret.nasect = n+1;
118 }
119 ret.asect[n].align = utoalign(p);
120 } else if (!ustricmp(source->keyword, L"text-section-underline")) {
121 wchar_t *p = uadv(source->keyword);
122 int n = 0;
123 if (uisdigit(*p)) {
124 n = utoi(p);
125 p = uadv(p);
126 }
127 if (n >= ret.nasect) {
128 int i;
129 ret.asect = resize(ret.asect, n+1);
130 for (i = ret.nasect; i <= n; i++)
131 ret.asect[i] = ret.asect[ret.nasect-1];
132 ret.nasect = n+1;
133 }
134 ret.asect[n].underline = *p;
135 } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
136 wchar_t *p = uadv(source->keyword);
137 int n = 0;
138 if (uisdigit(*p)) {
139 n = utoi(p);
140 p = uadv(p);
141 }
142 if (n >= ret.nasect) {
143 int i;
144 ret.asect = resize(ret.asect, n+1);
145 for (i = ret.nasect; i <= n; i++)
146 ret.asect[i] = ret.asect[ret.nasect-1];
147 ret.nasect = n+1;
148 }
149 ret.asect[n].just_numbers = utob(p);
150 } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
151 wchar_t *p = uadv(source->keyword);
152 int n = 0;
153 if (uisdigit(*p)) {
154 n = utoi(p);
155 p = uadv(p);
156 }
157 if (n >= ret.nasect) {
158 int i;
159 ret.asect = resize(ret.asect, n+1);
160 for (i = ret.nasect; i <= n; i++) {
161 ret.asect[i] = ret.asect[ret.nasect-1];
162 }
163 ret.nasect = n+1;
164 }
165 ret.asect[n].number_suffix = p;
166 } else if (!ustricmp(source->keyword, L"text-title-align")) {
167 ret.atitle.align = utoalign(uadv(source->keyword));
168 } else if (!ustricmp(source->keyword, L"text-title-underline")) {
169 ret.atitle.underline = *uadv(source->keyword);
170 } else if (!ustricmp(source->keyword, L"text-versionid")) {
171 ret.include_version_id = utob(uadv(source->keyword));
172 } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
173 ret.indent_preambles = utob(uadv(source->keyword));
174 } else if (!ustricmp(source->keyword, L"text-bullet")) {
175 ret.bullet.text = uadv(source->keyword);
176 }
177 }
178 }
179
180 return ret;
181 }
182
183 paragraph *text_config_filename(char *filename)
184 {
185 paragraph *p;
186 wchar_t *ufilename, *up;
187 int len;
188
189 p = mknew(paragraph);
190 memset(p, 0, sizeof(*p));
191 p->type = para_Config;
192 p->next = NULL;
193 p->fpos.filename = "<command line>";
194 p->fpos.line = p->fpos.col = -1;
195
196 ufilename = ufroma_dup(filename);
197 len = ustrlen(ufilename) + 2 + lenof(L"text-filename");
198 p->keyword = mknewa(wchar_t, len);
199 up = p->keyword;
200 ustrcpy(up, L"text-filename");
201 up = uadv(up);
202 ustrcpy(up, ufilename);
203 up = uadv(up);
204 *up = L'\0';
205 assert(up - p->keyword < len);
206 sfree(ufilename);
207
208 return p;
209 }
210
211 void text_backend(paragraph *sourceform, keywordlist *keywords,
212 indexdata *idx, void *unused) {
213 paragraph *p;
214 textconfig conf;
215 word *prefix, *body, *wp;
216 word spaceword;
217 FILE *fp;
218 char *prefixextra;
219 int nesting, nestindent;
220 int indentb, indenta;
221
222 IGNORE(unused);
223 IGNORE(keywords); /* we don't happen to need this */
224 IGNORE(idx); /* or this */
225
226 conf = text_configure(sourceform);
227
228 /*
229 * Open the output file.
230 */
231 fp = fopen(conf.filename, "w");
232 if (!fp) {
233 error(err_cantopenw, conf.filename);
234 return;
235 }
236
237 /* Do the title */
238 for (p = sourceform; p; p = p->next)
239 if (p->type == para_Title)
240 text_heading(fp, NULL, NULL, p->words,
241 conf.atitle, conf.indent, conf.width);
242
243 nestindent = conf.listindentbefore + conf.listindentafter;
244 nesting = (conf.indent_preambles ? 0 : -conf.indent);
245
246 /* Do the main document */
247 for (p = sourceform; p; p = p->next) switch (p->type) {
248
249 case para_QuotePush:
250 nesting += 2;
251 break;
252 case para_QuotePop:
253 nesting -= 2;
254 assert(nesting >= 0);
255 break;
256
257 case para_LcontPush:
258 nesting += nestindent;
259 break;
260 case para_LcontPop:
261 nesting -= nestindent;
262 assert(nesting >= 0);
263 break;
264
265 /*
266 * Things we ignore because we've already processed them or
267 * aren't going to touch them in this pass.
268 */
269 case para_IM:
270 case para_BR:
271 case para_Biblio: /* only touch BiblioCited */
272 case para_VersionID:
273 case para_NoCite:
274 case para_Title:
275 break;
276
277 /*
278 * Chapter titles.
279 */
280 case para_Chapter:
281 case para_Appendix:
282 case para_UnnumberedChapter:
283 text_heading(fp, p->kwtext, p->kwtext2, p->words,
284 conf.achapter, conf.indent, conf.width);
285 nesting = 0;
286 break;
287
288 case para_Heading:
289 case para_Subsect:
290 text_heading(fp, p->kwtext, p->kwtext2, p->words,
291 conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
292 conf.indent, conf.width);
293 break;
294
295 case para_Rule:
296 text_rule(fp, conf.indent + nesting, conf.width - nesting);
297 break;
298
299 case para_Normal:
300 case para_Copyright:
301 case para_DescribedThing:
302 case para_Description:
303 case para_BiblioCited:
304 case para_Bullet:
305 case para_NumberedList:
306 if (p->type == para_Bullet) {
307 prefix = &conf.bullet;
308 prefixextra = NULL;
309 indentb = conf.listindentbefore;
310 indenta = conf.listindentafter;
311 } else if (p->type == para_NumberedList) {
312 prefix = p->kwtext;
313 prefixextra = "."; /* FIXME: configurability */
314 indentb = conf.listindentbefore;
315 indenta = conf.listindentafter;
316 } else if (p->type == para_Description) {
317 prefix = NULL;
318 prefixextra = NULL;
319 indentb = conf.listindentbefore;
320 indenta = conf.listindentafter;
321 } else {
322 prefix = NULL;
323 prefixextra = NULL;
324 indentb = indenta = 0;
325 }
326 if (p->type == para_BiblioCited) {
327 body = dup_word_list(p->kwtext);
328 for (wp = body; wp->next; wp = wp->next);
329 wp->next = &spaceword;
330 spaceword.next = p->words;
331 spaceword.alt = NULL;
332 spaceword.type = word_WhiteSpace;
333 spaceword.text = NULL;
334 } else {
335 wp = NULL;
336 body = p->words;
337 }
338 text_para(fp, prefix, prefixextra, body,
339 conf.indent + nesting + indentb, indenta,
340 conf.width - nesting - indentb - indenta);
341 if (wp) {
342 wp->next = NULL;
343 free_word_list(body);
344 }
345 break;
346
347 case para_Code:
348 text_codepara(fp, p->words,
349 conf.indent + nesting + conf.indent_code,
350 conf.width - nesting - 2 * conf.indent_code);
351 break;
352 }
353
354 /* Do the version ID */
355 if (conf.include_version_id) {
356 for (p = sourceform; p; p = p->next)
357 if (p->type == para_VersionID)
358 text_versionid(fp, p->words);
359 }
360
361 /*
362 * Tidy up
363 */
364 fclose(fp);
365 sfree(conf.asect);
366 sfree(conf.filename);
367 }
368
369 /*
370 * Convert a wide string into a string of chars. If `result' is
371 * non-NULL, mallocs the resulting string and stores a pointer to
372 * it in `*result'. If `result' is NULL, merely checks whether all
373 * characters in the string are feasible for the output character
374 * set.
375 *
376 * Return is nonzero if all characters are OK. If not all
377 * characters are OK but `result' is non-NULL, a result _will_
378 * still be generated!
379 */
380 static int text_convert(wchar_t *s, char **result) {
381 /*
382 * FIXME. Currently this is ISO8859-1 only.
383 */
384 int doing = (result != 0);
385 int ok = TRUE;
386 char *p = NULL;
387 int plen = 0, psize = 0;
388
389 for (; *s; s++) {
390 wchar_t c = *s;
391 char outc;
392
393 if ((c >= 32 && c <= 126) ||
394 (c >= 160 && c <= 255)) {
395 /* Char is OK. */
396 outc = (char)c;
397 } else {
398 /* Char is not OK. */
399 ok = FALSE;
400 outc = 0xBF; /* approximate the good old DEC `uh?' */
401 }
402 if (doing) {
403 if (plen >= psize) {
404 psize = plen + 256;
405 p = resize(p, psize);
406 }
407 p[plen++] = outc;
408 }
409 }
410 if (doing) {
411 p = resize(p, plen+1);
412 p[plen] = '\0';
413 *result = p;
414 }
415 return ok;
416 }
417
418 static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
419 char *c;
420
421 for (; text && text != end; text = text->next) switch (text->type) {
422 case word_HyperLink:
423 case word_HyperEnd:
424 case word_UpperXref:
425 case word_LowerXref:
426 case word_XrefEnd:
427 case word_IndexRef:
428 break;
429
430 case word_Normal:
431 case word_Emph:
432 case word_Code:
433 case word_WeakCode:
434 case word_WhiteSpace:
435 case word_EmphSpace:
436 case word_CodeSpace:
437 case word_WkCodeSpace:
438 case word_Quote:
439 case word_EmphQuote:
440 case word_CodeQuote:
441 case word_WkCodeQuote:
442 assert(text->type != word_CodeQuote &&
443 text->type != word_WkCodeQuote);
444 if (towordstyle(text->type) == word_Emph &&
445 (attraux(text->aux) == attr_First ||
446 attraux(text->aux) == attr_Only))
447 rdaddc(rs, '_'); /* FIXME: configurability */
448 else if (towordstyle(text->type) == word_Code &&
449 (attraux(text->aux) == attr_First ||
450 attraux(text->aux) == attr_Only))
451 rdaddc(rs, '`'); /* FIXME: configurability */
452 if (removeattr(text->type) == word_Normal) {
453 if (text_convert(text->text, &c))
454 rdaddsc(rs, c);
455 else
456 text_rdaddwc(rs, text->alt, NULL);
457 sfree(c);
458 } else if (removeattr(text->type) == word_WhiteSpace) {
459 rdaddc(rs, ' ');
460 } else if (removeattr(text->type) == word_Quote) {
461 rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
462 /* FIXME: configurability */
463 }
464 if (towordstyle(text->type) == word_Emph &&
465 (attraux(text->aux) == attr_Last ||
466 attraux(text->aux) == attr_Only))
467 rdaddc(rs, '_'); /* FIXME: configurability */
468 else if (towordstyle(text->type) == word_Code &&
469 (attraux(text->aux) == attr_Last ||
470 attraux(text->aux) == attr_Only))
471 rdaddc(rs, '\''); /* FIXME: configurability */
472 break;
473 }
474 }
475
476 static int text_width(word *);
477
478 static int text_width_list(word *text) {
479 int w = 0;
480 while (text) {
481 w += text_width(text);
482 text = text->next;
483 }
484 return w;
485 }
486
487 static int text_width(word *text) {
488 switch (text->type) {
489 case word_HyperLink:
490 case word_HyperEnd:
491 case word_UpperXref:
492 case word_LowerXref:
493 case word_XrefEnd:
494 case word_IndexRef:
495 return 0;
496
497 case word_Normal:
498 case word_Emph:
499 case word_Code:
500 case word_WeakCode:
501 return (((text->type == word_Emph ||
502 text->type == word_Code)
503 ? (attraux(text->aux) == attr_Only ? 2 :
504 attraux(text->aux) == attr_Always ? 0 : 1)
505 : 0) +
506 (text_convert(text->text, NULL) ?
507 ustrlen(text->text) :
508 text_width_list(text->alt)));
509
510 case word_WhiteSpace:
511 case word_EmphSpace:
512 case word_CodeSpace:
513 case word_WkCodeSpace:
514 case word_Quote:
515 case word_EmphQuote:
516 case word_CodeQuote:
517 case word_WkCodeQuote:
518 assert(text->type != word_CodeQuote &&
519 text->type != word_WkCodeQuote);
520 return (((towordstyle(text->type) == word_Emph ||
521 towordstyle(text->type) == word_Code)
522 ? (attraux(text->aux) == attr_Only ? 2 :
523 attraux(text->aux) == attr_Always ? 0 : 1)
524 : 0) + 1);
525 }
526 return 0; /* should never happen */
527 }
528
529 static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
530 alignstruct align, int indent, int width) {
531 rdstringc t = { 0, 0, NULL };
532 int margin, length;
533 int firstlinewidth, wrapwidth;
534 wrappedline *wrapping, *p;
535
536 if (align.just_numbers && nprefix) {
537 char *c;
538 text_rdaddwc(&t, nprefix, NULL);
539 if (text_convert(align.number_suffix, &c)) {
540 rdaddsc(&t, c);
541 sfree(c);
542 }
543 } else if (!align.just_numbers && tprefix) {
544 char *c;
545 text_rdaddwc(&t, tprefix, NULL);
546 if (text_convert(align.number_suffix, &c)) {
547 rdaddsc(&t, c);
548 sfree(c);
549 }
550 }
551 margin = length = (t.text ? strlen(t.text) : 0);
552
553 if (align.align == LEFTPLUS) {
554 margin = indent - margin;
555 if (margin < 0) margin = 0;
556 firstlinewidth = indent + width - margin - length;
557 wrapwidth = width;
558 } else if (align.align == LEFT || align.align == CENTRE) {
559 margin = 0;
560 firstlinewidth = indent + width - length;
561 wrapwidth = indent + width;
562 }
563
564 wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
565 for (p = wrapping; p; p = p->next) {
566 text_rdaddwc(&t, p->begin, p->end);
567 length = (t.text ? strlen(t.text) : 0);
568 if (align.align == CENTRE) {
569 margin = (indent + width - length)/2;
570 if (margin < 0) margin = 0;
571 }
572 fprintf(fp, "%*s%s\n", margin, "", t.text);
573 if (align.underline != L'\0') {
574 char *u, uc;
575 wchar_t uw[2];
576 uw[0] = align.underline; uw[1] = L'\0';
577 text_convert(uw, &u);
578 uc = u[0];
579 sfree(u);
580 fprintf(fp, "%*s", margin, "");
581 while (length--)
582 putc(uc, fp);
583 putc('\n', fp);
584 }
585 if (align.align == LEFTPLUS)
586 margin = indent;
587 else
588 margin = 0;
589 sfree(t.text);
590 t = empty_rdstringc;
591 }
592 wrap_free(wrapping);
593 putc('\n', fp);
594
595 sfree(t.text);
596 }
597
598 static void text_rule(FILE *fp, int indent, int width) {
599 while (indent--) putc(' ', fp);
600 while (width--) putc('-', fp); /* FIXME: configurability! */
601 putc('\n', fp);
602 putc('\n', fp);
603 }
604
605 static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
606 int indent, int extraindent, int width) {
607 wrappedline *wrapping, *p;
608 rdstringc pfx = { 0, 0, NULL };
609 int e;
610 int firstlinewidth = width;
611
612 if (prefix) {
613 text_rdaddwc(&pfx, prefix, NULL);
614 if (prefixextra)
615 rdaddsc(&pfx, prefixextra);
616 fprintf(fp, "%*s%s", indent, "", pfx.text);
617 /* If the prefix is too long, shorten the first line to fit. */
618 e = extraindent - strlen(pfx.text);
619 if (e < 0) {
620 firstlinewidth += e; /* this decreases it, since e < 0 */
621 if (firstlinewidth < 0) {
622 e = indent + extraindent;
623 firstlinewidth = width;
624 fprintf(fp, "\n");
625 } else
626 e = 0;
627 }
628 sfree(pfx.text);
629 } else
630 e = indent + extraindent;
631
632 wrapping = wrap_para(text, firstlinewidth, width, text_width);
633 for (p = wrapping; p; p = p->next) {
634 rdstringc t = { 0, 0, NULL };
635 text_rdaddwc(&t, p->begin, p->end);
636 fprintf(fp, "%*s%s\n", e, "", t.text);
637 e = indent + extraindent;
638 sfree(t.text);
639 }
640 wrap_free(wrapping);
641 putc('\n', fp);
642 }
643
644 static void text_codepara(FILE *fp, word *text, int indent, int width) {
645 for (; text; text = text->next) if (text->type == word_WeakCode) {
646 char *c;
647 text_convert(text->text, &c);
648 if (strlen(c) > (size_t)width) {
649 /* FIXME: warn */
650 }
651 fprintf(fp, "%*s%s\n", indent, "", c);
652 sfree(c);
653 }
654
655 putc('\n', fp);
656 }
657
658 static void text_versionid(FILE *fp, word *text) {
659 rdstringc t = { 0, 0, NULL };
660
661 rdaddc(&t, '['); /* FIXME: configurability */
662 text_rdaddwc(&t, text, NULL);
663 rdaddc(&t, ']'); /* FIXME: configurability */
664
665 fprintf(fp, "%s\n", t.text);
666 sfree(t.text);
667 }