Add \cfg / -C directives to allow the user to choose the output file
[sgt/halibut] / bk_text.c
CommitLineData
d7482997 1/*
2 * text backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
10typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
11typedef struct {
12 alignment align;
13 int just_numbers;
14 wchar_t underline;
63223c78 15 wchar_t *number_suffix;
d7482997 16} alignstruct;
17
18typedef struct {
19 int indent, indent_code;
20 int listindentbefore, listindentafter;
21 int width;
22 alignstruct atitle, achapter, *asect;
23 int nasect;
24 int include_version_id;
25 int indent_preambles;
26 word bullet;
50d6b4bd 27 char *filename;
d7482997 28} textconfig;
29
30static int text_convert(wchar_t *, char **);
31
32static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
33static void text_rule(FILE *, int, int);
34static void text_para(FILE *, word *, char *, word *, int, int, int);
35static void text_codepara(FILE *, word *, int, int);
36static void text_versionid(FILE *, word *);
37
38static alignment utoalign(wchar_t *p) {
39 if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
40 return CENTRE;
41 if (!ustricmp(p, L"leftplus"))
42 return LEFTPLUS;
43 return LEFT;
44}
45
46static textconfig text_configure(paragraph *source) {
47 textconfig ret;
48
49 /*
50 * Non-negotiables.
51 */
52 ret.bullet.next = NULL;
53 ret.bullet.alt = NULL;
54 ret.bullet.type = word_Normal;
55 ret.atitle.just_numbers = FALSE; /* ignored */
56
57 /*
58 * Defaults.
59 */
60 ret.indent = 7;
61 ret.indent_code = 2;
62 ret.listindentbefore = 1;
63 ret.listindentafter = 3;
64 ret.width = 68;
65 ret.atitle.align = CENTRE;
66 ret.atitle.underline = L'=';
67 ret.achapter.align = LEFT;
68 ret.achapter.just_numbers = FALSE;
e5e6bf9d 69 ret.achapter.number_suffix = L": ";
d7482997 70 ret.achapter.underline = L'-';
71 ret.nasect = 1;
72 ret.asect = mknewa(alignstruct, ret.nasect);
73 ret.asect[0].align = LEFTPLUS;
74 ret.asect[0].just_numbers = TRUE;
e5e6bf9d 75 ret.asect[0].number_suffix = L" ";
d7482997 76 ret.asect[0].underline = L'\0';
77 ret.include_version_id = TRUE;
78 ret.indent_preambles = FALSE;
e5e6bf9d 79 ret.bullet.text = L"-";
50d6b4bd 80 ret.filename = dupstr("output.txt");
d7482997 81
82 for (; source; source = source->next) {
83 if (source->type == para_Config) {
84 if (!ustricmp(source->keyword, L"text-indent")) {
85 ret.indent = utoi(uadv(source->keyword));
50d6b4bd 86 } else if (!ustricmp(source->keyword, L"text-filename")) {
87 sfree(ret.filename);
88 ret.filename = utoa_dup(uadv(source->keyword));
d7482997 89 } else if (!ustricmp(source->keyword, L"text-indent-code")) {
90 ret.indent_code = utoi(uadv(source->keyword));
91 } else if (!ustricmp(source->keyword, L"text-width")) {
92 ret.width = utoi(uadv(source->keyword));
93 } else if (!ustricmp(source->keyword, L"text-list-indent")) {
94 ret.listindentbefore = utoi(uadv(source->keyword));
95 } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
96 ret.listindentafter = utoi(uadv(source->keyword));
97 } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
98 ret.achapter.align = utoalign(uadv(source->keyword));
99 } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
100 ret.achapter.underline = *uadv(source->keyword);
101 } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
c83c6495 102 ret.achapter.just_numbers = utob(uadv(source->keyword));
63223c78 103 } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
e5e6bf9d 104 ret.achapter.number_suffix = uadv(source->keyword);
d7482997 105 } else if (!ustricmp(source->keyword, L"text-section-align")) {
106 wchar_t *p = uadv(source->keyword);
107 int n = 0;
108 if (uisdigit(*p)) {
109 n = utoi(p);
110 p = uadv(p);
111 }
112 if (n >= ret.nasect) {
113 int i;
114 ret.asect = resize(ret.asect, n+1);
115 for (i = ret.nasect; i <= n; i++)
116 ret.asect[i] = ret.asect[ret.nasect-1];
117 ret.nasect = n+1;
118 }
119 ret.asect[n].align = utoalign(p);
120 } else if (!ustricmp(source->keyword, L"text-section-underline")) {
121 wchar_t *p = uadv(source->keyword);
122 int n = 0;
123 if (uisdigit(*p)) {
124 n = utoi(p);
125 p = uadv(p);
126 }
127 if (n >= ret.nasect) {
128 int i;
129 ret.asect = resize(ret.asect, n+1);
130 for (i = ret.nasect; i <= n; i++)
131 ret.asect[i] = ret.asect[ret.nasect-1];
132 ret.nasect = n+1;
133 }
134 ret.asect[n].underline = *p;
135 } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
136 wchar_t *p = uadv(source->keyword);
137 int n = 0;
138 if (uisdigit(*p)) {
139 n = utoi(p);
140 p = uadv(p);
141 }
142 if (n >= ret.nasect) {
143 int i;
144 ret.asect = resize(ret.asect, n+1);
145 for (i = ret.nasect; i <= n; i++)
146 ret.asect[i] = ret.asect[ret.nasect-1];
147 ret.nasect = n+1;
148 }
149 ret.asect[n].just_numbers = utob(p);
63223c78 150 } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
151 wchar_t *p = uadv(source->keyword);
152 int n = 0;
153 if (uisdigit(*p)) {
154 n = utoi(p);
155 p = uadv(p);
156 }
157 if (n >= ret.nasect) {
158 int i;
159 ret.asect = resize(ret.asect, n+1);
e5e6bf9d 160 for (i = ret.nasect; i <= n; i++) {
63223c78 161 ret.asect[i] = ret.asect[ret.nasect-1];
e5e6bf9d 162 }
63223c78 163 ret.nasect = n+1;
164 }
e5e6bf9d 165 ret.asect[n].number_suffix = p;
d7482997 166 } else if (!ustricmp(source->keyword, L"text-title-align")) {
167 ret.atitle.align = utoalign(uadv(source->keyword));
168 } else if (!ustricmp(source->keyword, L"text-title-underline")) {
169 ret.atitle.underline = *uadv(source->keyword);
170 } else if (!ustricmp(source->keyword, L"text-versionid")) {
171 ret.include_version_id = utob(uadv(source->keyword));
172 } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
173 ret.indent_preambles = utob(uadv(source->keyword));
174 } else if (!ustricmp(source->keyword, L"text-bullet")) {
175 ret.bullet.text = uadv(source->keyword);
176 }
177 }
178 }
179
180 return ret;
181}
182
183void text_backend(paragraph *sourceform, keywordlist *keywords,
184 indexdata *idx) {
185 paragraph *p;
186 textconfig conf;
187 word *prefix, *body, *wp;
188 word spaceword;
189 FILE *fp;
190 char *prefixextra;
7136a6c7 191 int nesting, nestindent;
d7482997 192 int indentb, indenta;
193
194 IGNORE(keywords); /* we don't happen to need this */
195 IGNORE(idx); /* or this */
196
197 conf = text_configure(sourceform);
198
199 /*
50d6b4bd 200 * Open the output file.
d7482997 201 */
50d6b4bd 202 fp = fopen(conf.filename, "w");
d7482997 203 if (!fp) {
50d6b4bd 204 error(err_cantopenw, conf.filename);
d7482997 205 return;
206 }
207
208 /* Do the title */
209 for (p = sourceform; p; p = p->next)
210 if (p->type == para_Title)
211 text_heading(fp, NULL, NULL, p->words,
212 conf.atitle, conf.indent, conf.width);
213
7136a6c7 214 nestindent = conf.listindentbefore + conf.listindentafter;
8902e0ed 215 nesting = (conf.indent_preambles ? 0 : -conf.indent);
7136a6c7 216
d7482997 217 /* Do the main document */
218 for (p = sourceform; p; p = p->next) switch (p->type) {
219
2614b01d 220 case para_QuotePush:
221 nesting += 2;
222 break;
223 case para_QuotePop:
224 nesting -= 2;
225 assert(nesting >= 0);
226 break;
227
7136a6c7 228 case para_LcontPush:
2614b01d 229 nesting += nestindent;
7136a6c7 230 break;
231 case para_LcontPop:
2614b01d 232 nesting -= nestindent;
233 assert(nesting >= 0);
7136a6c7 234 break;
235
d7482997 236 /*
237 * Things we ignore because we've already processed them or
238 * aren't going to touch them in this pass.
239 */
240 case para_IM:
241 case para_BR:
242 case para_Biblio: /* only touch BiblioCited */
243 case para_VersionID:
d7482997 244 case para_NoCite:
245 case para_Title:
246 break;
247
248 /*
249 * Chapter titles.
250 */
251 case para_Chapter:
252 case para_Appendix:
253 case para_UnnumberedChapter:
254 text_heading(fp, p->kwtext, p->kwtext2, p->words,
255 conf.achapter, conf.indent, conf.width);
8902e0ed 256 nesting = 0;
d7482997 257 break;
258
259 case para_Heading:
260 case para_Subsect:
261 text_heading(fp, p->kwtext, p->kwtext2, p->words,
262 conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
263 conf.indent, conf.width);
264 break;
265
266 case para_Rule:
2614b01d 267 text_rule(fp, conf.indent + nesting, conf.width - nesting);
d7482997 268 break;
269
270 case para_Normal:
9057a0a8 271 case para_Copyright:
7136a6c7 272 case para_DescribedThing:
273 case para_Description:
d7482997 274 case para_BiblioCited:
275 case para_Bullet:
276 case para_NumberedList:
277 if (p->type == para_Bullet) {
278 prefix = &conf.bullet;
279 prefixextra = NULL;
280 indentb = conf.listindentbefore;
281 indenta = conf.listindentafter;
282 } else if (p->type == para_NumberedList) {
283 prefix = p->kwtext;
284 prefixextra = "."; /* FIXME: configurability */
285 indentb = conf.listindentbefore;
286 indenta = conf.listindentafter;
7136a6c7 287 } else if (p->type == para_Description) {
288 prefix = NULL;
289 prefixextra = NULL;
290 indentb = conf.listindentbefore;
291 indenta = conf.listindentafter;
d7482997 292 } else {
293 prefix = NULL;
294 prefixextra = NULL;
295 indentb = indenta = 0;
296 }
297 if (p->type == para_BiblioCited) {
298 body = dup_word_list(p->kwtext);
299 for (wp = body; wp->next; wp = wp->next);
300 wp->next = &spaceword;
301 spaceword.next = p->words;
302 spaceword.alt = NULL;
303 spaceword.type = word_WhiteSpace;
304 spaceword.text = NULL;
305 } else {
306 wp = NULL;
307 body = p->words;
308 }
309 text_para(fp, prefix, prefixextra, body,
2614b01d 310 conf.indent + nesting + indentb, indenta,
311 conf.width - nesting - indentb - indenta);
d7482997 312 if (wp) {
313 wp->next = NULL;
314 free_word_list(body);
315 }
316 break;
317
318 case para_Code:
7136a6c7 319 text_codepara(fp, p->words,
2614b01d 320 conf.indent + nesting + conf.indent_code,
321 conf.width - nesting - 2 * conf.indent_code);
d7482997 322 break;
323 }
324
325 /* Do the version ID */
326 if (conf.include_version_id) {
327 for (p = sourceform; p; p = p->next)
328 if (p->type == para_VersionID)
329 text_versionid(fp, p->words);
330 }
331
332 /*
333 * Tidy up
334 */
335 fclose(fp);
e5e6bf9d 336 sfree(conf.asect);
50d6b4bd 337 sfree(conf.filename);
d7482997 338}
339
340/*
341 * Convert a wide string into a string of chars. If `result' is
342 * non-NULL, mallocs the resulting string and stores a pointer to
343 * it in `*result'. If `result' is NULL, merely checks whether all
344 * characters in the string are feasible for the output character
345 * set.
346 *
347 * Return is nonzero if all characters are OK. If not all
348 * characters are OK but `result' is non-NULL, a result _will_
349 * still be generated!
350 */
351static int text_convert(wchar_t *s, char **result) {
352 /*
353 * FIXME. Currently this is ISO8859-1 only.
354 */
355 int doing = (result != 0);
356 int ok = TRUE;
357 char *p = NULL;
358 int plen = 0, psize = 0;
359
360 for (; *s; s++) {
361 wchar_t c = *s;
362 char outc;
363
364 if ((c >= 32 && c <= 126) ||
365 (c >= 160 && c <= 255)) {
366 /* Char is OK. */
367 outc = (char)c;
368 } else {
369 /* Char is not OK. */
370 ok = FALSE;
371 outc = 0xBF; /* approximate the good old DEC `uh?' */
372 }
373 if (doing) {
374 if (plen >= psize) {
375 psize = plen + 256;
376 p = resize(p, psize);
377 }
378 p[plen++] = outc;
379 }
380 }
381 if (doing) {
382 p = resize(p, plen+1);
383 p[plen] = '\0';
384 *result = p;
385 }
386 return ok;
387}
388
389static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
390 char *c;
391
392 for (; text && text != end; text = text->next) switch (text->type) {
393 case word_HyperLink:
394 case word_HyperEnd:
395 case word_UpperXref:
396 case word_LowerXref:
397 case word_XrefEnd:
398 case word_IndexRef:
399 break;
400
401 case word_Normal:
402 case word_Emph:
403 case word_Code:
404 case word_WeakCode:
405 case word_WhiteSpace:
406 case word_EmphSpace:
407 case word_CodeSpace:
408 case word_WkCodeSpace:
409 case word_Quote:
410 case word_EmphQuote:
411 case word_CodeQuote:
412 case word_WkCodeQuote:
413 assert(text->type != word_CodeQuote &&
414 text->type != word_WkCodeQuote);
415 if (towordstyle(text->type) == word_Emph &&
416 (attraux(text->aux) == attr_First ||
417 attraux(text->aux) == attr_Only))
418 rdaddc(rs, '_'); /* FIXME: configurability */
419 else if (towordstyle(text->type) == word_Code &&
420 (attraux(text->aux) == attr_First ||
421 attraux(text->aux) == attr_Only))
422 rdaddc(rs, '`'); /* FIXME: configurability */
423 if (removeattr(text->type) == word_Normal) {
424 if (text_convert(text->text, &c))
425 rdaddsc(rs, c);
426 else
427 text_rdaddwc(rs, text->alt, NULL);
428 sfree(c);
429 } else if (removeattr(text->type) == word_WhiteSpace) {
430 rdaddc(rs, ' ');
431 } else if (removeattr(text->type) == word_Quote) {
432 rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
433 /* FIXME: configurability */
434 }
435 if (towordstyle(text->type) == word_Emph &&
436 (attraux(text->aux) == attr_Last ||
437 attraux(text->aux) == attr_Only))
438 rdaddc(rs, '_'); /* FIXME: configurability */
439 else if (towordstyle(text->type) == word_Code &&
440 (attraux(text->aux) == attr_Last ||
441 attraux(text->aux) == attr_Only))
442 rdaddc(rs, '\''); /* FIXME: configurability */
443 break;
444 }
445}
446
447static int text_width(word *);
448
449static int text_width_list(word *text) {
450 int w = 0;
451 while (text) {
452 w += text_width(text);
453 text = text->next;
454 }
455 return w;
456}
457
458static int text_width(word *text) {
459 switch (text->type) {
460 case word_HyperLink:
461 case word_HyperEnd:
462 case word_UpperXref:
463 case word_LowerXref:
464 case word_XrefEnd:
465 case word_IndexRef:
466 return 0;
467
468 case word_Normal:
469 case word_Emph:
470 case word_Code:
471 case word_WeakCode:
472 return (((text->type == word_Emph ||
473 text->type == word_Code)
474 ? (attraux(text->aux) == attr_Only ? 2 :
475 attraux(text->aux) == attr_Always ? 0 : 1)
476 : 0) +
477 (text_convert(text->text, NULL) ?
478 ustrlen(text->text) :
479 text_width_list(text->alt)));
480
481 case word_WhiteSpace:
482 case word_EmphSpace:
483 case word_CodeSpace:
484 case word_WkCodeSpace:
485 case word_Quote:
486 case word_EmphQuote:
487 case word_CodeQuote:
488 case word_WkCodeQuote:
489 assert(text->type != word_CodeQuote &&
490 text->type != word_WkCodeQuote);
491 return (((towordstyle(text->type) == word_Emph ||
492 towordstyle(text->type) == word_Code)
493 ? (attraux(text->aux) == attr_Only ? 2 :
494 attraux(text->aux) == attr_Always ? 0 : 1)
495 : 0) + 1);
496 }
497 return 0; /* should never happen */
498}
499
500static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
501 alignstruct align, int indent, int width) {
502 rdstringc t = { 0, 0, NULL };
503 int margin, length;
504 int firstlinewidth, wrapwidth;
505 wrappedline *wrapping, *p;
506
507 if (align.just_numbers && nprefix) {
63223c78 508 char *c;
d7482997 509 text_rdaddwc(&t, nprefix, NULL);
63223c78 510 if (text_convert(align.number_suffix, &c)) {
511 rdaddsc(&t, c);
512 sfree(c);
513 }
d7482997 514 } else if (!align.just_numbers && tprefix) {
63223c78 515 char *c;
d7482997 516 text_rdaddwc(&t, tprefix, NULL);
63223c78 517 if (text_convert(align.number_suffix, &c)) {
518 rdaddsc(&t, c);
519 sfree(c);
520 }
d7482997 521 }
522 margin = length = (t.text ? strlen(t.text) : 0);
523
524 if (align.align == LEFTPLUS) {
525 margin = indent - margin;
526 if (margin < 0) margin = 0;
527 firstlinewidth = indent + width - margin - length;
528 wrapwidth = width;
529 } else if (align.align == LEFT || align.align == CENTRE) {
530 margin = 0;
531 firstlinewidth = indent + width - length;
532 wrapwidth = indent + width;
533 }
534
535 wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
536 for (p = wrapping; p; p = p->next) {
537 text_rdaddwc(&t, p->begin, p->end);
538 length = (t.text ? strlen(t.text) : 0);
539 if (align.align == CENTRE) {
540 margin = (indent + width - length)/2;
541 if (margin < 0) margin = 0;
542 }
543 fprintf(fp, "%*s%s\n", margin, "", t.text);
544 if (align.underline != L'\0') {
545 char *u, uc;
546 wchar_t uw[2];
547 uw[0] = align.underline; uw[1] = L'\0';
548 text_convert(uw, &u);
549 uc = u[0];
550 sfree(u);
551 fprintf(fp, "%*s", margin, "");
552 while (length--)
553 putc(uc, fp);
554 putc('\n', fp);
555 }
556 if (align.align == LEFTPLUS)
557 margin = indent;
558 else
559 margin = 0;
560 sfree(t.text);
561 t = empty_rdstringc;
562 }
563 wrap_free(wrapping);
564 putc('\n', fp);
565
566 sfree(t.text);
567}
568
569static void text_rule(FILE *fp, int indent, int width) {
570 while (indent--) putc(' ', fp);
571 while (width--) putc('-', fp); /* FIXME: configurability! */
572 putc('\n', fp);
573 putc('\n', fp);
574}
575
576static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
577 int indent, int extraindent, int width) {
578 wrappedline *wrapping, *p;
579 rdstringc pfx = { 0, 0, NULL };
580 int e;
581 int firstlinewidth = width;
582
583 if (prefix) {
584 text_rdaddwc(&pfx, prefix, NULL);
585 if (prefixextra)
586 rdaddsc(&pfx, prefixextra);
587 fprintf(fp, "%*s%s", indent, "", pfx.text);
c83c6495 588 /* If the prefix is too long, shorten the first line to fit. */
d7482997 589 e = extraindent - strlen(pfx.text);
590 if (e < 0) {
c83c6495 591 firstlinewidth += e; /* this decreases it, since e < 0 */
d7482997 592 if (firstlinewidth < 0) {
593 e = indent + extraindent;
594 firstlinewidth = width;
595 fprintf(fp, "\n");
c83c6495 596 } else
597 e = 0;
d7482997 598 }
599 sfree(pfx.text);
600 } else
601 e = indent + extraindent;
602
603 wrapping = wrap_para(text, firstlinewidth, width, text_width);
604 for (p = wrapping; p; p = p->next) {
605 rdstringc t = { 0, 0, NULL };
606 text_rdaddwc(&t, p->begin, p->end);
607 fprintf(fp, "%*s%s\n", e, "", t.text);
608 e = indent + extraindent;
609 sfree(t.text);
610 }
611 wrap_free(wrapping);
612 putc('\n', fp);
613}
614
615static void text_codepara(FILE *fp, word *text, int indent, int width) {
616 for (; text; text = text->next) if (text->type == word_WeakCode) {
617 char *c;
618 text_convert(text->text, &c);
619 if (strlen(c) > (size_t)width) {
620 /* FIXME: warn */
621 }
622 fprintf(fp, "%*s%s\n", indent, "", c);
623 sfree(c);
624 }
625
626 putc('\n', fp);
627}
628
629static void text_versionid(FILE *fp, word *text) {
630 rdstringc t = { 0, 0, NULL };
631
632 rdaddc(&t, '['); /* FIXME: configurability */
633 text_rdaddwc(&t, text, NULL);
634 rdaddc(&t, ']'); /* FIXME: configurability */
635
636 fprintf(fp, "%s\n", t.text);
637 sfree(t.text);
638}