Fix a couple of memory leaks in backends.
[sgt/halibut] / bk_text.c
CommitLineData
d7482997 1/*
2 * text backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
10typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
11typedef struct {
12 alignment align;
13 int just_numbers;
14 wchar_t underline;
63223c78 15 wchar_t *number_suffix;
d7482997 16} alignstruct;
17
18typedef struct {
19 int indent, indent_code;
20 int listindentbefore, listindentafter;
21 int width;
22 alignstruct atitle, achapter, *asect;
23 int nasect;
24 int include_version_id;
25 int indent_preambles;
26 word bullet;
27} textconfig;
28
29static int text_convert(wchar_t *, char **);
30
31static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
32static void text_rule(FILE *, int, int);
33static void text_para(FILE *, word *, char *, word *, int, int, int);
34static void text_codepara(FILE *, word *, int, int);
35static void text_versionid(FILE *, word *);
36
37static alignment utoalign(wchar_t *p) {
38 if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
39 return CENTRE;
40 if (!ustricmp(p, L"leftplus"))
41 return LEFTPLUS;
42 return LEFT;
43}
44
45static textconfig text_configure(paragraph *source) {
46 textconfig ret;
47
48 /*
49 * Non-negotiables.
50 */
51 ret.bullet.next = NULL;
52 ret.bullet.alt = NULL;
53 ret.bullet.type = word_Normal;
54 ret.atitle.just_numbers = FALSE; /* ignored */
55
56 /*
57 * Defaults.
58 */
59 ret.indent = 7;
60 ret.indent_code = 2;
61 ret.listindentbefore = 1;
62 ret.listindentafter = 3;
63 ret.width = 68;
64 ret.atitle.align = CENTRE;
65 ret.atitle.underline = L'=';
66 ret.achapter.align = LEFT;
67 ret.achapter.just_numbers = FALSE;
e5e6bf9d 68 ret.achapter.number_suffix = L": ";
d7482997 69 ret.achapter.underline = L'-';
70 ret.nasect = 1;
71 ret.asect = mknewa(alignstruct, ret.nasect);
72 ret.asect[0].align = LEFTPLUS;
73 ret.asect[0].just_numbers = TRUE;
e5e6bf9d 74 ret.asect[0].number_suffix = L" ";
d7482997 75 ret.asect[0].underline = L'\0';
76 ret.include_version_id = TRUE;
77 ret.indent_preambles = FALSE;
e5e6bf9d 78 ret.bullet.text = L"-";
d7482997 79
80 for (; source; source = source->next) {
81 if (source->type == para_Config) {
82 if (!ustricmp(source->keyword, L"text-indent")) {
83 ret.indent = utoi(uadv(source->keyword));
84 } else if (!ustricmp(source->keyword, L"text-indent-code")) {
85 ret.indent_code = utoi(uadv(source->keyword));
86 } else if (!ustricmp(source->keyword, L"text-width")) {
87 ret.width = utoi(uadv(source->keyword));
88 } else if (!ustricmp(source->keyword, L"text-list-indent")) {
89 ret.listindentbefore = utoi(uadv(source->keyword));
90 } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
91 ret.listindentafter = utoi(uadv(source->keyword));
92 } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
93 ret.achapter.align = utoalign(uadv(source->keyword));
94 } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
95 ret.achapter.underline = *uadv(source->keyword);
96 } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
c83c6495 97 ret.achapter.just_numbers = utob(uadv(source->keyword));
63223c78 98 } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
e5e6bf9d 99 ret.achapter.number_suffix = uadv(source->keyword);
d7482997 100 } else if (!ustricmp(source->keyword, L"text-section-align")) {
101 wchar_t *p = uadv(source->keyword);
102 int n = 0;
103 if (uisdigit(*p)) {
104 n = utoi(p);
105 p = uadv(p);
106 }
107 if (n >= ret.nasect) {
108 int i;
109 ret.asect = resize(ret.asect, n+1);
110 for (i = ret.nasect; i <= n; i++)
111 ret.asect[i] = ret.asect[ret.nasect-1];
112 ret.nasect = n+1;
113 }
114 ret.asect[n].align = utoalign(p);
115 } else if (!ustricmp(source->keyword, L"text-section-underline")) {
116 wchar_t *p = uadv(source->keyword);
117 int n = 0;
118 if (uisdigit(*p)) {
119 n = utoi(p);
120 p = uadv(p);
121 }
122 if (n >= ret.nasect) {
123 int i;
124 ret.asect = resize(ret.asect, n+1);
125 for (i = ret.nasect; i <= n; i++)
126 ret.asect[i] = ret.asect[ret.nasect-1];
127 ret.nasect = n+1;
128 }
129 ret.asect[n].underline = *p;
130 } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
131 wchar_t *p = uadv(source->keyword);
132 int n = 0;
133 if (uisdigit(*p)) {
134 n = utoi(p);
135 p = uadv(p);
136 }
137 if (n >= ret.nasect) {
138 int i;
139 ret.asect = resize(ret.asect, n+1);
140 for (i = ret.nasect; i <= n; i++)
141 ret.asect[i] = ret.asect[ret.nasect-1];
142 ret.nasect = n+1;
143 }
144 ret.asect[n].just_numbers = utob(p);
63223c78 145 } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
146 wchar_t *p = uadv(source->keyword);
147 int n = 0;
148 if (uisdigit(*p)) {
149 n = utoi(p);
150 p = uadv(p);
151 }
152 if (n >= ret.nasect) {
153 int i;
154 ret.asect = resize(ret.asect, n+1);
e5e6bf9d 155 for (i = ret.nasect; i <= n; i++) {
63223c78 156 ret.asect[i] = ret.asect[ret.nasect-1];
e5e6bf9d 157 }
63223c78 158 ret.nasect = n+1;
159 }
e5e6bf9d 160 ret.asect[n].number_suffix = p;
d7482997 161 } else if (!ustricmp(source->keyword, L"text-title-align")) {
162 ret.atitle.align = utoalign(uadv(source->keyword));
163 } else if (!ustricmp(source->keyword, L"text-title-underline")) {
164 ret.atitle.underline = *uadv(source->keyword);
165 } else if (!ustricmp(source->keyword, L"text-versionid")) {
166 ret.include_version_id = utob(uadv(source->keyword));
167 } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
168 ret.indent_preambles = utob(uadv(source->keyword));
169 } else if (!ustricmp(source->keyword, L"text-bullet")) {
170 ret.bullet.text = uadv(source->keyword);
171 }
172 }
173 }
174
175 return ret;
176}
177
178void text_backend(paragraph *sourceform, keywordlist *keywords,
179 indexdata *idx) {
180 paragraph *p;
181 textconfig conf;
182 word *prefix, *body, *wp;
183 word spaceword;
184 FILE *fp;
185 char *prefixextra;
7136a6c7 186 int nesting, nestindent;
d7482997 187 int indentb, indenta;
188
189 IGNORE(keywords); /* we don't happen to need this */
190 IGNORE(idx); /* or this */
191
192 conf = text_configure(sourceform);
193
194 /*
195 * Determine the output file name, and open the output file
196 *
197 * FIXME: want configurable output file names here. For the
198 * moment, we'll just call it `output.txt'.
199 */
200 fp = fopen("output.txt", "w");
201 if (!fp) {
202 error(err_cantopenw, "output.txt");
203 return;
204 }
205
206 /* Do the title */
207 for (p = sourceform; p; p = p->next)
208 if (p->type == para_Title)
209 text_heading(fp, NULL, NULL, p->words,
210 conf.atitle, conf.indent, conf.width);
211
7136a6c7 212 nestindent = conf.listindentbefore + conf.listindentafter;
8902e0ed 213 nesting = (conf.indent_preambles ? 0 : -conf.indent);
7136a6c7 214
d7482997 215 /* Do the main document */
216 for (p = sourceform; p; p = p->next) switch (p->type) {
217
2614b01d 218 case para_QuotePush:
219 nesting += 2;
220 break;
221 case para_QuotePop:
222 nesting -= 2;
223 assert(nesting >= 0);
224 break;
225
7136a6c7 226 case para_LcontPush:
2614b01d 227 nesting += nestindent;
7136a6c7 228 break;
229 case para_LcontPop:
2614b01d 230 nesting -= nestindent;
231 assert(nesting >= 0);
7136a6c7 232 break;
233
d7482997 234 /*
235 * Things we ignore because we've already processed them or
236 * aren't going to touch them in this pass.
237 */
238 case para_IM:
239 case para_BR:
240 case para_Biblio: /* only touch BiblioCited */
241 case para_VersionID:
d7482997 242 case para_NoCite:
243 case para_Title:
244 break;
245
246 /*
247 * Chapter titles.
248 */
249 case para_Chapter:
250 case para_Appendix:
251 case para_UnnumberedChapter:
252 text_heading(fp, p->kwtext, p->kwtext2, p->words,
253 conf.achapter, conf.indent, conf.width);
8902e0ed 254 nesting = 0;
d7482997 255 break;
256
257 case para_Heading:
258 case para_Subsect:
259 text_heading(fp, p->kwtext, p->kwtext2, p->words,
260 conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
261 conf.indent, conf.width);
262 break;
263
264 case para_Rule:
2614b01d 265 text_rule(fp, conf.indent + nesting, conf.width - nesting);
d7482997 266 break;
267
268 case para_Normal:
9057a0a8 269 case para_Copyright:
7136a6c7 270 case para_DescribedThing:
271 case para_Description:
d7482997 272 case para_BiblioCited:
273 case para_Bullet:
274 case para_NumberedList:
275 if (p->type == para_Bullet) {
276 prefix = &conf.bullet;
277 prefixextra = NULL;
278 indentb = conf.listindentbefore;
279 indenta = conf.listindentafter;
280 } else if (p->type == para_NumberedList) {
281 prefix = p->kwtext;
282 prefixextra = "."; /* FIXME: configurability */
283 indentb = conf.listindentbefore;
284 indenta = conf.listindentafter;
7136a6c7 285 } else if (p->type == para_Description) {
286 prefix = NULL;
287 prefixextra = NULL;
288 indentb = conf.listindentbefore;
289 indenta = conf.listindentafter;
d7482997 290 } else {
291 prefix = NULL;
292 prefixextra = NULL;
293 indentb = indenta = 0;
294 }
295 if (p->type == para_BiblioCited) {
296 body = dup_word_list(p->kwtext);
297 for (wp = body; wp->next; wp = wp->next);
298 wp->next = &spaceword;
299 spaceword.next = p->words;
300 spaceword.alt = NULL;
301 spaceword.type = word_WhiteSpace;
302 spaceword.text = NULL;
303 } else {
304 wp = NULL;
305 body = p->words;
306 }
307 text_para(fp, prefix, prefixextra, body,
2614b01d 308 conf.indent + nesting + indentb, indenta,
309 conf.width - nesting - indentb - indenta);
d7482997 310 if (wp) {
311 wp->next = NULL;
312 free_word_list(body);
313 }
314 break;
315
316 case para_Code:
7136a6c7 317 text_codepara(fp, p->words,
2614b01d 318 conf.indent + nesting + conf.indent_code,
319 conf.width - nesting - 2 * conf.indent_code);
d7482997 320 break;
321 }
322
323 /* Do the version ID */
324 if (conf.include_version_id) {
325 for (p = sourceform; p; p = p->next)
326 if (p->type == para_VersionID)
327 text_versionid(fp, p->words);
328 }
329
330 /*
331 * Tidy up
332 */
333 fclose(fp);
e5e6bf9d 334 sfree(conf.asect);
d7482997 335}
336
337/*
338 * Convert a wide string into a string of chars. If `result' is
339 * non-NULL, mallocs the resulting string and stores a pointer to
340 * it in `*result'. If `result' is NULL, merely checks whether all
341 * characters in the string are feasible for the output character
342 * set.
343 *
344 * Return is nonzero if all characters are OK. If not all
345 * characters are OK but `result' is non-NULL, a result _will_
346 * still be generated!
347 */
348static int text_convert(wchar_t *s, char **result) {
349 /*
350 * FIXME. Currently this is ISO8859-1 only.
351 */
352 int doing = (result != 0);
353 int ok = TRUE;
354 char *p = NULL;
355 int plen = 0, psize = 0;
356
357 for (; *s; s++) {
358 wchar_t c = *s;
359 char outc;
360
361 if ((c >= 32 && c <= 126) ||
362 (c >= 160 && c <= 255)) {
363 /* Char is OK. */
364 outc = (char)c;
365 } else {
366 /* Char is not OK. */
367 ok = FALSE;
368 outc = 0xBF; /* approximate the good old DEC `uh?' */
369 }
370 if (doing) {
371 if (plen >= psize) {
372 psize = plen + 256;
373 p = resize(p, psize);
374 }
375 p[plen++] = outc;
376 }
377 }
378 if (doing) {
379 p = resize(p, plen+1);
380 p[plen] = '\0';
381 *result = p;
382 }
383 return ok;
384}
385
386static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
387 char *c;
388
389 for (; text && text != end; text = text->next) switch (text->type) {
390 case word_HyperLink:
391 case word_HyperEnd:
392 case word_UpperXref:
393 case word_LowerXref:
394 case word_XrefEnd:
395 case word_IndexRef:
396 break;
397
398 case word_Normal:
399 case word_Emph:
400 case word_Code:
401 case word_WeakCode:
402 case word_WhiteSpace:
403 case word_EmphSpace:
404 case word_CodeSpace:
405 case word_WkCodeSpace:
406 case word_Quote:
407 case word_EmphQuote:
408 case word_CodeQuote:
409 case word_WkCodeQuote:
410 assert(text->type != word_CodeQuote &&
411 text->type != word_WkCodeQuote);
412 if (towordstyle(text->type) == word_Emph &&
413 (attraux(text->aux) == attr_First ||
414 attraux(text->aux) == attr_Only))
415 rdaddc(rs, '_'); /* FIXME: configurability */
416 else if (towordstyle(text->type) == word_Code &&
417 (attraux(text->aux) == attr_First ||
418 attraux(text->aux) == attr_Only))
419 rdaddc(rs, '`'); /* FIXME: configurability */
420 if (removeattr(text->type) == word_Normal) {
421 if (text_convert(text->text, &c))
422 rdaddsc(rs, c);
423 else
424 text_rdaddwc(rs, text->alt, NULL);
425 sfree(c);
426 } else if (removeattr(text->type) == word_WhiteSpace) {
427 rdaddc(rs, ' ');
428 } else if (removeattr(text->type) == word_Quote) {
429 rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
430 /* FIXME: configurability */
431 }
432 if (towordstyle(text->type) == word_Emph &&
433 (attraux(text->aux) == attr_Last ||
434 attraux(text->aux) == attr_Only))
435 rdaddc(rs, '_'); /* FIXME: configurability */
436 else if (towordstyle(text->type) == word_Code &&
437 (attraux(text->aux) == attr_Last ||
438 attraux(text->aux) == attr_Only))
439 rdaddc(rs, '\''); /* FIXME: configurability */
440 break;
441 }
442}
443
444static int text_width(word *);
445
446static int text_width_list(word *text) {
447 int w = 0;
448 while (text) {
449 w += text_width(text);
450 text = text->next;
451 }
452 return w;
453}
454
455static int text_width(word *text) {
456 switch (text->type) {
457 case word_HyperLink:
458 case word_HyperEnd:
459 case word_UpperXref:
460 case word_LowerXref:
461 case word_XrefEnd:
462 case word_IndexRef:
463 return 0;
464
465 case word_Normal:
466 case word_Emph:
467 case word_Code:
468 case word_WeakCode:
469 return (((text->type == word_Emph ||
470 text->type == word_Code)
471 ? (attraux(text->aux) == attr_Only ? 2 :
472 attraux(text->aux) == attr_Always ? 0 : 1)
473 : 0) +
474 (text_convert(text->text, NULL) ?
475 ustrlen(text->text) :
476 text_width_list(text->alt)));
477
478 case word_WhiteSpace:
479 case word_EmphSpace:
480 case word_CodeSpace:
481 case word_WkCodeSpace:
482 case word_Quote:
483 case word_EmphQuote:
484 case word_CodeQuote:
485 case word_WkCodeQuote:
486 assert(text->type != word_CodeQuote &&
487 text->type != word_WkCodeQuote);
488 return (((towordstyle(text->type) == word_Emph ||
489 towordstyle(text->type) == word_Code)
490 ? (attraux(text->aux) == attr_Only ? 2 :
491 attraux(text->aux) == attr_Always ? 0 : 1)
492 : 0) + 1);
493 }
494 return 0; /* should never happen */
495}
496
497static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
498 alignstruct align, int indent, int width) {
499 rdstringc t = { 0, 0, NULL };
500 int margin, length;
501 int firstlinewidth, wrapwidth;
502 wrappedline *wrapping, *p;
503
504 if (align.just_numbers && nprefix) {
63223c78 505 char *c;
d7482997 506 text_rdaddwc(&t, nprefix, NULL);
63223c78 507 if (text_convert(align.number_suffix, &c)) {
508 rdaddsc(&t, c);
509 sfree(c);
510 }
d7482997 511 } else if (!align.just_numbers && tprefix) {
63223c78 512 char *c;
d7482997 513 text_rdaddwc(&t, tprefix, NULL);
63223c78 514 if (text_convert(align.number_suffix, &c)) {
515 rdaddsc(&t, c);
516 sfree(c);
517 }
d7482997 518 }
519 margin = length = (t.text ? strlen(t.text) : 0);
520
521 if (align.align == LEFTPLUS) {
522 margin = indent - margin;
523 if (margin < 0) margin = 0;
524 firstlinewidth = indent + width - margin - length;
525 wrapwidth = width;
526 } else if (align.align == LEFT || align.align == CENTRE) {
527 margin = 0;
528 firstlinewidth = indent + width - length;
529 wrapwidth = indent + width;
530 }
531
532 wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
533 for (p = wrapping; p; p = p->next) {
534 text_rdaddwc(&t, p->begin, p->end);
535 length = (t.text ? strlen(t.text) : 0);
536 if (align.align == CENTRE) {
537 margin = (indent + width - length)/2;
538 if (margin < 0) margin = 0;
539 }
540 fprintf(fp, "%*s%s\n", margin, "", t.text);
541 if (align.underline != L'\0') {
542 char *u, uc;
543 wchar_t uw[2];
544 uw[0] = align.underline; uw[1] = L'\0';
545 text_convert(uw, &u);
546 uc = u[0];
547 sfree(u);
548 fprintf(fp, "%*s", margin, "");
549 while (length--)
550 putc(uc, fp);
551 putc('\n', fp);
552 }
553 if (align.align == LEFTPLUS)
554 margin = indent;
555 else
556 margin = 0;
557 sfree(t.text);
558 t = empty_rdstringc;
559 }
560 wrap_free(wrapping);
561 putc('\n', fp);
562
563 sfree(t.text);
564}
565
566static void text_rule(FILE *fp, int indent, int width) {
567 while (indent--) putc(' ', fp);
568 while (width--) putc('-', fp); /* FIXME: configurability! */
569 putc('\n', fp);
570 putc('\n', fp);
571}
572
573static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
574 int indent, int extraindent, int width) {
575 wrappedline *wrapping, *p;
576 rdstringc pfx = { 0, 0, NULL };
577 int e;
578 int firstlinewidth = width;
579
580 if (prefix) {
581 text_rdaddwc(&pfx, prefix, NULL);
582 if (prefixextra)
583 rdaddsc(&pfx, prefixextra);
584 fprintf(fp, "%*s%s", indent, "", pfx.text);
c83c6495 585 /* If the prefix is too long, shorten the first line to fit. */
d7482997 586 e = extraindent - strlen(pfx.text);
587 if (e < 0) {
c83c6495 588 firstlinewidth += e; /* this decreases it, since e < 0 */
d7482997 589 if (firstlinewidth < 0) {
590 e = indent + extraindent;
591 firstlinewidth = width;
592 fprintf(fp, "\n");
c83c6495 593 } else
594 e = 0;
d7482997 595 }
596 sfree(pfx.text);
597 } else
598 e = indent + extraindent;
599
600 wrapping = wrap_para(text, firstlinewidth, width, text_width);
601 for (p = wrapping; p; p = p->next) {
602 rdstringc t = { 0, 0, NULL };
603 text_rdaddwc(&t, p->begin, p->end);
604 fprintf(fp, "%*s%s\n", e, "", t.text);
605 e = indent + extraindent;
606 sfree(t.text);
607 }
608 wrap_free(wrapping);
609 putc('\n', fp);
610}
611
612static void text_codepara(FILE *fp, word *text, int indent, int width) {
613 for (; text; text = text->next) if (text->type == word_WeakCode) {
614 char *c;
615 text_convert(text->text, &c);
616 if (strlen(c) > (size_t)width) {
617 /* FIXME: warn */
618 }
619 fprintf(fp, "%*s%s\n", indent, "", c);
620 sfree(c);
621 }
622
623 putc('\n', fp);
624}
625
626static void text_versionid(FILE *fp, word *text) {
627 rdstringc t = { 0, 0, NULL };
628
629 rdaddc(&t, '['); /* FIXME: configurability */
630 text_rdaddwc(&t, text, NULL);
631 rdaddc(&t, ']'); /* FIXME: configurability */
632
633 fprintf(fp, "%s\n", t.text);
634 sfree(t.text);
635}