First instalment of a manual for Halibut. This lot covers the
[sgt/halibut] / bk_text.c
CommitLineData
d7482997 1/*
2 * text backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
10typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
11typedef struct {
12 alignment align;
13 int just_numbers;
14 wchar_t underline;
63223c78 15 wchar_t *number_suffix;
d7482997 16} alignstruct;
17
18typedef struct {
19 int indent, indent_code;
20 int listindentbefore, listindentafter;
21 int width;
22 alignstruct atitle, achapter, *asect;
23 int nasect;
24 int include_version_id;
25 int indent_preambles;
26 word bullet;
27} textconfig;
28
29static int text_convert(wchar_t *, char **);
30
31static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
32static void text_rule(FILE *, int, int);
33static void text_para(FILE *, word *, char *, word *, int, int, int);
34static void text_codepara(FILE *, word *, int, int);
35static void text_versionid(FILE *, word *);
36
37static alignment utoalign(wchar_t *p) {
38 if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
39 return CENTRE;
40 if (!ustricmp(p, L"leftplus"))
41 return LEFTPLUS;
42 return LEFT;
43}
44
45static textconfig text_configure(paragraph *source) {
46 textconfig ret;
47
48 /*
49 * Non-negotiables.
50 */
51 ret.bullet.next = NULL;
52 ret.bullet.alt = NULL;
53 ret.bullet.type = word_Normal;
54 ret.atitle.just_numbers = FALSE; /* ignored */
55
56 /*
57 * Defaults.
58 */
59 ret.indent = 7;
60 ret.indent_code = 2;
61 ret.listindentbefore = 1;
62 ret.listindentafter = 3;
63 ret.width = 68;
64 ret.atitle.align = CENTRE;
65 ret.atitle.underline = L'=';
66 ret.achapter.align = LEFT;
67 ret.achapter.just_numbers = FALSE;
63223c78 68 ret.achapter.number_suffix = ustrdup(L": ");
d7482997 69 ret.achapter.underline = L'-';
70 ret.nasect = 1;
71 ret.asect = mknewa(alignstruct, ret.nasect);
72 ret.asect[0].align = LEFTPLUS;
73 ret.asect[0].just_numbers = TRUE;
63223c78 74 ret.asect[0].number_suffix = ustrdup(L" ");
d7482997 75 ret.asect[0].underline = L'\0';
76 ret.include_version_id = TRUE;
77 ret.indent_preambles = FALSE;
78 ret.bullet.text = ustrdup(L"-");
79
80 for (; source; source = source->next) {
81 if (source->type == para_Config) {
82 if (!ustricmp(source->keyword, L"text-indent")) {
83 ret.indent = utoi(uadv(source->keyword));
84 } else if (!ustricmp(source->keyword, L"text-indent-code")) {
85 ret.indent_code = utoi(uadv(source->keyword));
86 } else if (!ustricmp(source->keyword, L"text-width")) {
87 ret.width = utoi(uadv(source->keyword));
88 } else if (!ustricmp(source->keyword, L"text-list-indent")) {
89 ret.listindentbefore = utoi(uadv(source->keyword));
90 } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
91 ret.listindentafter = utoi(uadv(source->keyword));
92 } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
93 ret.achapter.align = utoalign(uadv(source->keyword));
94 } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
95 ret.achapter.underline = *uadv(source->keyword);
96 } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
c83c6495 97 ret.achapter.just_numbers = utob(uadv(source->keyword));
63223c78 98 } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
a0f2c111 99 ret.achapter.number_suffix = ustrdup(uadv(source->keyword));
d7482997 100 } else if (!ustricmp(source->keyword, L"text-section-align")) {
101 wchar_t *p = uadv(source->keyword);
102 int n = 0;
103 if (uisdigit(*p)) {
104 n = utoi(p);
105 p = uadv(p);
106 }
107 if (n >= ret.nasect) {
108 int i;
109 ret.asect = resize(ret.asect, n+1);
110 for (i = ret.nasect; i <= n; i++)
111 ret.asect[i] = ret.asect[ret.nasect-1];
112 ret.nasect = n+1;
113 }
114 ret.asect[n].align = utoalign(p);
115 } else if (!ustricmp(source->keyword, L"text-section-underline")) {
116 wchar_t *p = uadv(source->keyword);
117 int n = 0;
118 if (uisdigit(*p)) {
119 n = utoi(p);
120 p = uadv(p);
121 }
122 if (n >= ret.nasect) {
123 int i;
124 ret.asect = resize(ret.asect, n+1);
125 for (i = ret.nasect; i <= n; i++)
126 ret.asect[i] = ret.asect[ret.nasect-1];
127 ret.nasect = n+1;
128 }
129 ret.asect[n].underline = *p;
130 } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
131 wchar_t *p = uadv(source->keyword);
132 int n = 0;
133 if (uisdigit(*p)) {
134 n = utoi(p);
135 p = uadv(p);
136 }
137 if (n >= ret.nasect) {
138 int i;
139 ret.asect = resize(ret.asect, n+1);
140 for (i = ret.nasect; i <= n; i++)
141 ret.asect[i] = ret.asect[ret.nasect-1];
142 ret.nasect = n+1;
143 }
144 ret.asect[n].just_numbers = utob(p);
63223c78 145 } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
146 wchar_t *p = uadv(source->keyword);
147 int n = 0;
148 if (uisdigit(*p)) {
149 n = utoi(p);
150 p = uadv(p);
151 }
152 if (n >= ret.nasect) {
153 int i;
154 ret.asect = resize(ret.asect, n+1);
155 for (i = ret.nasect; i <= n; i++)
156 ret.asect[i] = ret.asect[ret.nasect-1];
157 ret.nasect = n+1;
158 }
a0f2c111 159 ret.asect[n].number_suffix = ustrdup(p);
d7482997 160 } else if (!ustricmp(source->keyword, L"text-title-align")) {
161 ret.atitle.align = utoalign(uadv(source->keyword));
162 } else if (!ustricmp(source->keyword, L"text-title-underline")) {
163 ret.atitle.underline = *uadv(source->keyword);
164 } else if (!ustricmp(source->keyword, L"text-versionid")) {
165 ret.include_version_id = utob(uadv(source->keyword));
166 } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
167 ret.indent_preambles = utob(uadv(source->keyword));
168 } else if (!ustricmp(source->keyword, L"text-bullet")) {
169 ret.bullet.text = uadv(source->keyword);
170 }
171 }
172 }
173
174 return ret;
175}
176
177void text_backend(paragraph *sourceform, keywordlist *keywords,
178 indexdata *idx) {
179 paragraph *p;
180 textconfig conf;
181 word *prefix, *body, *wp;
182 word spaceword;
183 FILE *fp;
184 char *prefixextra;
7136a6c7 185 int nesting, nestindent;
d7482997 186 int indentb, indenta;
187
188 IGNORE(keywords); /* we don't happen to need this */
189 IGNORE(idx); /* or this */
190
191 conf = text_configure(sourceform);
192
193 /*
194 * Determine the output file name, and open the output file
195 *
196 * FIXME: want configurable output file names here. For the
197 * moment, we'll just call it `output.txt'.
198 */
199 fp = fopen("output.txt", "w");
200 if (!fp) {
201 error(err_cantopenw, "output.txt");
202 return;
203 }
204
205 /* Do the title */
206 for (p = sourceform; p; p = p->next)
207 if (p->type == para_Title)
208 text_heading(fp, NULL, NULL, p->words,
209 conf.atitle, conf.indent, conf.width);
210
211 /* Do the preamble and copyright */
212 for (p = sourceform; p; p = p->next)
213 if (p->type == para_Preamble)
214 text_para(fp, NULL, NULL, p->words,
215 conf.indent_preambles ? conf.indent : 0, 0,
216 conf.width + (conf.indent_preambles ? 0 : conf.indent));
217 for (p = sourceform; p; p = p->next)
218 if (p->type == para_Copyright)
219 text_para(fp, NULL, NULL, p->words,
220 conf.indent_preambles ? conf.indent : 0, 0,
221 conf.width + (conf.indent_preambles ? 0 : conf.indent));
222
7136a6c7 223 nestindent = conf.listindentbefore + conf.listindentafter;
224 nesting = 0;
225
d7482997 226 /* Do the main document */
227 for (p = sourceform; p; p = p->next) switch (p->type) {
228
2614b01d 229 case para_QuotePush:
230 nesting += 2;
231 break;
232 case para_QuotePop:
233 nesting -= 2;
234 assert(nesting >= 0);
235 break;
236
7136a6c7 237 case para_LcontPush:
2614b01d 238 nesting += nestindent;
7136a6c7 239 break;
240 case para_LcontPop:
2614b01d 241 nesting -= nestindent;
242 assert(nesting >= 0);
7136a6c7 243 break;
244
d7482997 245 /*
246 * Things we ignore because we've already processed them or
247 * aren't going to touch them in this pass.
248 */
249 case para_IM:
250 case para_BR:
251 case para_Biblio: /* only touch BiblioCited */
252 case para_VersionID:
253 case para_Copyright:
254 case para_Preamble:
255 case para_NoCite:
256 case para_Title:
257 break;
258
259 /*
260 * Chapter titles.
261 */
262 case para_Chapter:
263 case para_Appendix:
264 case para_UnnumberedChapter:
265 text_heading(fp, p->kwtext, p->kwtext2, p->words,
266 conf.achapter, conf.indent, conf.width);
267 break;
268
269 case para_Heading:
270 case para_Subsect:
271 text_heading(fp, p->kwtext, p->kwtext2, p->words,
272 conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
273 conf.indent, conf.width);
274 break;
275
276 case para_Rule:
2614b01d 277 text_rule(fp, conf.indent + nesting, conf.width - nesting);
d7482997 278 break;
279
280 case para_Normal:
7136a6c7 281 case para_DescribedThing:
282 case para_Description:
d7482997 283 case para_BiblioCited:
284 case para_Bullet:
285 case para_NumberedList:
286 if (p->type == para_Bullet) {
287 prefix = &conf.bullet;
288 prefixextra = NULL;
289 indentb = conf.listindentbefore;
290 indenta = conf.listindentafter;
291 } else if (p->type == para_NumberedList) {
292 prefix = p->kwtext;
293 prefixextra = "."; /* FIXME: configurability */
294 indentb = conf.listindentbefore;
295 indenta = conf.listindentafter;
7136a6c7 296 } else if (p->type == para_Description) {
297 prefix = NULL;
298 prefixextra = NULL;
299 indentb = conf.listindentbefore;
300 indenta = conf.listindentafter;
d7482997 301 } else {
302 prefix = NULL;
303 prefixextra = NULL;
304 indentb = indenta = 0;
305 }
306 if (p->type == para_BiblioCited) {
307 body = dup_word_list(p->kwtext);
308 for (wp = body; wp->next; wp = wp->next);
309 wp->next = &spaceword;
310 spaceword.next = p->words;
311 spaceword.alt = NULL;
312 spaceword.type = word_WhiteSpace;
313 spaceword.text = NULL;
314 } else {
315 wp = NULL;
316 body = p->words;
317 }
318 text_para(fp, prefix, prefixextra, body,
2614b01d 319 conf.indent + nesting + indentb, indenta,
320 conf.width - nesting - indentb - indenta);
d7482997 321 if (wp) {
322 wp->next = NULL;
323 free_word_list(body);
324 }
325 break;
326
327 case para_Code:
7136a6c7 328 text_codepara(fp, p->words,
2614b01d 329 conf.indent + nesting + conf.indent_code,
330 conf.width - nesting - 2 * conf.indent_code);
d7482997 331 break;
332 }
333
334 /* Do the version ID */
335 if (conf.include_version_id) {
336 for (p = sourceform; p; p = p->next)
337 if (p->type == para_VersionID)
338 text_versionid(fp, p->words);
339 }
340
341 /*
342 * Tidy up
343 */
344 fclose(fp);
677e18a2 345 {
346 int i;
347 sfree(conf.achapter.number_suffix);
348 for (i = 0; i < conf.nasect; i++)
349 sfree(conf.asect[i].number_suffix);
350 sfree(conf.asect);
351 sfree(conf.bullet.text);
352 }
d7482997 353}
354
355/*
356 * Convert a wide string into a string of chars. If `result' is
357 * non-NULL, mallocs the resulting string and stores a pointer to
358 * it in `*result'. If `result' is NULL, merely checks whether all
359 * characters in the string are feasible for the output character
360 * set.
361 *
362 * Return is nonzero if all characters are OK. If not all
363 * characters are OK but `result' is non-NULL, a result _will_
364 * still be generated!
365 */
366static int text_convert(wchar_t *s, char **result) {
367 /*
368 * FIXME. Currently this is ISO8859-1 only.
369 */
370 int doing = (result != 0);
371 int ok = TRUE;
372 char *p = NULL;
373 int plen = 0, psize = 0;
374
375 for (; *s; s++) {
376 wchar_t c = *s;
377 char outc;
378
379 if ((c >= 32 && c <= 126) ||
380 (c >= 160 && c <= 255)) {
381 /* Char is OK. */
382 outc = (char)c;
383 } else {
384 /* Char is not OK. */
385 ok = FALSE;
386 outc = 0xBF; /* approximate the good old DEC `uh?' */
387 }
388 if (doing) {
389 if (plen >= psize) {
390 psize = plen + 256;
391 p = resize(p, psize);
392 }
393 p[plen++] = outc;
394 }
395 }
396 if (doing) {
397 p = resize(p, plen+1);
398 p[plen] = '\0';
399 *result = p;
400 }
401 return ok;
402}
403
404static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
405 char *c;
406
407 for (; text && text != end; text = text->next) switch (text->type) {
408 case word_HyperLink:
409 case word_HyperEnd:
410 case word_UpperXref:
411 case word_LowerXref:
412 case word_XrefEnd:
413 case word_IndexRef:
414 break;
415
416 case word_Normal:
417 case word_Emph:
418 case word_Code:
419 case word_WeakCode:
420 case word_WhiteSpace:
421 case word_EmphSpace:
422 case word_CodeSpace:
423 case word_WkCodeSpace:
424 case word_Quote:
425 case word_EmphQuote:
426 case word_CodeQuote:
427 case word_WkCodeQuote:
428 assert(text->type != word_CodeQuote &&
429 text->type != word_WkCodeQuote);
430 if (towordstyle(text->type) == word_Emph &&
431 (attraux(text->aux) == attr_First ||
432 attraux(text->aux) == attr_Only))
433 rdaddc(rs, '_'); /* FIXME: configurability */
434 else if (towordstyle(text->type) == word_Code &&
435 (attraux(text->aux) == attr_First ||
436 attraux(text->aux) == attr_Only))
437 rdaddc(rs, '`'); /* FIXME: configurability */
438 if (removeattr(text->type) == word_Normal) {
439 if (text_convert(text->text, &c))
440 rdaddsc(rs, c);
441 else
442 text_rdaddwc(rs, text->alt, NULL);
443 sfree(c);
444 } else if (removeattr(text->type) == word_WhiteSpace) {
445 rdaddc(rs, ' ');
446 } else if (removeattr(text->type) == word_Quote) {
447 rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
448 /* FIXME: configurability */
449 }
450 if (towordstyle(text->type) == word_Emph &&
451 (attraux(text->aux) == attr_Last ||
452 attraux(text->aux) == attr_Only))
453 rdaddc(rs, '_'); /* FIXME: configurability */
454 else if (towordstyle(text->type) == word_Code &&
455 (attraux(text->aux) == attr_Last ||
456 attraux(text->aux) == attr_Only))
457 rdaddc(rs, '\''); /* FIXME: configurability */
458 break;
459 }
460}
461
462static int text_width(word *);
463
464static int text_width_list(word *text) {
465 int w = 0;
466 while (text) {
467 w += text_width(text);
468 text = text->next;
469 }
470 return w;
471}
472
473static int text_width(word *text) {
474 switch (text->type) {
475 case word_HyperLink:
476 case word_HyperEnd:
477 case word_UpperXref:
478 case word_LowerXref:
479 case word_XrefEnd:
480 case word_IndexRef:
481 return 0;
482
483 case word_Normal:
484 case word_Emph:
485 case word_Code:
486 case word_WeakCode:
487 return (((text->type == word_Emph ||
488 text->type == word_Code)
489 ? (attraux(text->aux) == attr_Only ? 2 :
490 attraux(text->aux) == attr_Always ? 0 : 1)
491 : 0) +
492 (text_convert(text->text, NULL) ?
493 ustrlen(text->text) :
494 text_width_list(text->alt)));
495
496 case word_WhiteSpace:
497 case word_EmphSpace:
498 case word_CodeSpace:
499 case word_WkCodeSpace:
500 case word_Quote:
501 case word_EmphQuote:
502 case word_CodeQuote:
503 case word_WkCodeQuote:
504 assert(text->type != word_CodeQuote &&
505 text->type != word_WkCodeQuote);
506 return (((towordstyle(text->type) == word_Emph ||
507 towordstyle(text->type) == word_Code)
508 ? (attraux(text->aux) == attr_Only ? 2 :
509 attraux(text->aux) == attr_Always ? 0 : 1)
510 : 0) + 1);
511 }
512 return 0; /* should never happen */
513}
514
515static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
516 alignstruct align, int indent, int width) {
517 rdstringc t = { 0, 0, NULL };
518 int margin, length;
519 int firstlinewidth, wrapwidth;
520 wrappedline *wrapping, *p;
521
522 if (align.just_numbers && nprefix) {
63223c78 523 char *c;
d7482997 524 text_rdaddwc(&t, nprefix, NULL);
63223c78 525 if (text_convert(align.number_suffix, &c)) {
526 rdaddsc(&t, c);
527 sfree(c);
528 }
d7482997 529 } else if (!align.just_numbers && tprefix) {
63223c78 530 char *c;
d7482997 531 text_rdaddwc(&t, tprefix, NULL);
63223c78 532 if (text_convert(align.number_suffix, &c)) {
533 rdaddsc(&t, c);
534 sfree(c);
535 }
d7482997 536 }
537 margin = length = (t.text ? strlen(t.text) : 0);
538
539 if (align.align == LEFTPLUS) {
540 margin = indent - margin;
541 if (margin < 0) margin = 0;
542 firstlinewidth = indent + width - margin - length;
543 wrapwidth = width;
544 } else if (align.align == LEFT || align.align == CENTRE) {
545 margin = 0;
546 firstlinewidth = indent + width - length;
547 wrapwidth = indent + width;
548 }
549
550 wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
551 for (p = wrapping; p; p = p->next) {
552 text_rdaddwc(&t, p->begin, p->end);
553 length = (t.text ? strlen(t.text) : 0);
554 if (align.align == CENTRE) {
555 margin = (indent + width - length)/2;
556 if (margin < 0) margin = 0;
557 }
558 fprintf(fp, "%*s%s\n", margin, "", t.text);
559 if (align.underline != L'\0') {
560 char *u, uc;
561 wchar_t uw[2];
562 uw[0] = align.underline; uw[1] = L'\0';
563 text_convert(uw, &u);
564 uc = u[0];
565 sfree(u);
566 fprintf(fp, "%*s", margin, "");
567 while (length--)
568 putc(uc, fp);
569 putc('\n', fp);
570 }
571 if (align.align == LEFTPLUS)
572 margin = indent;
573 else
574 margin = 0;
575 sfree(t.text);
576 t = empty_rdstringc;
577 }
578 wrap_free(wrapping);
579 putc('\n', fp);
580
581 sfree(t.text);
582}
583
584static void text_rule(FILE *fp, int indent, int width) {
585 while (indent--) putc(' ', fp);
586 while (width--) putc('-', fp); /* FIXME: configurability! */
587 putc('\n', fp);
588 putc('\n', fp);
589}
590
591static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
592 int indent, int extraindent, int width) {
593 wrappedline *wrapping, *p;
594 rdstringc pfx = { 0, 0, NULL };
595 int e;
596 int firstlinewidth = width;
597
598 if (prefix) {
599 text_rdaddwc(&pfx, prefix, NULL);
600 if (prefixextra)
601 rdaddsc(&pfx, prefixextra);
602 fprintf(fp, "%*s%s", indent, "", pfx.text);
c83c6495 603 /* If the prefix is too long, shorten the first line to fit. */
d7482997 604 e = extraindent - strlen(pfx.text);
605 if (e < 0) {
c83c6495 606 firstlinewidth += e; /* this decreases it, since e < 0 */
d7482997 607 if (firstlinewidth < 0) {
608 e = indent + extraindent;
609 firstlinewidth = width;
610 fprintf(fp, "\n");
c83c6495 611 } else
612 e = 0;
d7482997 613 }
614 sfree(pfx.text);
615 } else
616 e = indent + extraindent;
617
618 wrapping = wrap_para(text, firstlinewidth, width, text_width);
619 for (p = wrapping; p; p = p->next) {
620 rdstringc t = { 0, 0, NULL };
621 text_rdaddwc(&t, p->begin, p->end);
622 fprintf(fp, "%*s%s\n", e, "", t.text);
623 e = indent + extraindent;
624 sfree(t.text);
625 }
626 wrap_free(wrapping);
627 putc('\n', fp);
628}
629
630static void text_codepara(FILE *fp, word *text, int indent, int width) {
631 for (; text; text = text->next) if (text->type == word_WeakCode) {
632 char *c;
633 text_convert(text->text, &c);
634 if (strlen(c) > (size_t)width) {
635 /* FIXME: warn */
636 }
637 fprintf(fp, "%*s%s\n", indent, "", c);
638 sfree(c);
639 }
640
641 putc('\n', fp);
642}
643
644static void text_versionid(FILE *fp, word *text) {
645 rdstringc t = { 0, 0, NULL };
646
647 rdaddc(&t, '['); /* FIXME: configurability */
648 text_rdaddwc(&t, text, NULL);
649 rdaddc(&t, ']'); /* FIXME: configurability */
650
651 fprintf(fp, "%s\n", t.text);
652 sfree(t.text);
653}