Arrange for multiple \cfg, \IM and \BR paragraphs to not require
[sgt/halibut] / bk_text.c
CommitLineData
d7482997 1/*
2 * text backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
10typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
11typedef struct {
12 alignment align;
13 int just_numbers;
14 wchar_t underline;
15} alignstruct;
16
17typedef struct {
18 int indent, indent_code;
19 int listindentbefore, listindentafter;
20 int width;
21 alignstruct atitle, achapter, *asect;
22 int nasect;
23 int include_version_id;
24 int indent_preambles;
25 word bullet;
26} textconfig;
27
28static int text_convert(wchar_t *, char **);
29
30static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
31static void text_rule(FILE *, int, int);
32static void text_para(FILE *, word *, char *, word *, int, int, int);
33static void text_codepara(FILE *, word *, int, int);
34static void text_versionid(FILE *, word *);
35
36static alignment utoalign(wchar_t *p) {
37 if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
38 return CENTRE;
39 if (!ustricmp(p, L"leftplus"))
40 return LEFTPLUS;
41 return LEFT;
42}
43
44static textconfig text_configure(paragraph *source) {
45 textconfig ret;
46
47 /*
48 * Non-negotiables.
49 */
50 ret.bullet.next = NULL;
51 ret.bullet.alt = NULL;
52 ret.bullet.type = word_Normal;
53 ret.atitle.just_numbers = FALSE; /* ignored */
54
55 /*
56 * Defaults.
57 */
58 ret.indent = 7;
59 ret.indent_code = 2;
60 ret.listindentbefore = 1;
61 ret.listindentafter = 3;
62 ret.width = 68;
63 ret.atitle.align = CENTRE;
64 ret.atitle.underline = L'=';
65 ret.achapter.align = LEFT;
66 ret.achapter.just_numbers = FALSE;
67 ret.achapter.underline = L'-';
68 ret.nasect = 1;
69 ret.asect = mknewa(alignstruct, ret.nasect);
70 ret.asect[0].align = LEFTPLUS;
71 ret.asect[0].just_numbers = TRUE;
72 ret.asect[0].underline = L'\0';
73 ret.include_version_id = TRUE;
74 ret.indent_preambles = FALSE;
75 ret.bullet.text = ustrdup(L"-");
76
77 for (; source; source = source->next) {
78 if (source->type == para_Config) {
79 if (!ustricmp(source->keyword, L"text-indent")) {
80 ret.indent = utoi(uadv(source->keyword));
81 } else if (!ustricmp(source->keyword, L"text-indent-code")) {
82 ret.indent_code = utoi(uadv(source->keyword));
83 } else if (!ustricmp(source->keyword, L"text-width")) {
84 ret.width = utoi(uadv(source->keyword));
85 } else if (!ustricmp(source->keyword, L"text-list-indent")) {
86 ret.listindentbefore = utoi(uadv(source->keyword));
87 } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
88 ret.listindentafter = utoi(uadv(source->keyword));
89 } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
90 ret.achapter.align = utoalign(uadv(source->keyword));
91 } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
92 ret.achapter.underline = *uadv(source->keyword);
93 } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
c83c6495 94 ret.achapter.just_numbers = utob(uadv(source->keyword));
d7482997 95 } else if (!ustricmp(source->keyword, L"text-section-align")) {
96 wchar_t *p = uadv(source->keyword);
97 int n = 0;
98 if (uisdigit(*p)) {
99 n = utoi(p);
100 p = uadv(p);
101 }
102 if (n >= ret.nasect) {
103 int i;
104 ret.asect = resize(ret.asect, n+1);
105 for (i = ret.nasect; i <= n; i++)
106 ret.asect[i] = ret.asect[ret.nasect-1];
107 ret.nasect = n+1;
108 }
109 ret.asect[n].align = utoalign(p);
110 } else if (!ustricmp(source->keyword, L"text-section-underline")) {
111 wchar_t *p = uadv(source->keyword);
112 int n = 0;
113 if (uisdigit(*p)) {
114 n = utoi(p);
115 p = uadv(p);
116 }
117 if (n >= ret.nasect) {
118 int i;
119 ret.asect = resize(ret.asect, n+1);
120 for (i = ret.nasect; i <= n; i++)
121 ret.asect[i] = ret.asect[ret.nasect-1];
122 ret.nasect = n+1;
123 }
124 ret.asect[n].underline = *p;
125 } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
126 wchar_t *p = uadv(source->keyword);
127 int n = 0;
128 if (uisdigit(*p)) {
129 n = utoi(p);
130 p = uadv(p);
131 }
132 if (n >= ret.nasect) {
133 int i;
134 ret.asect = resize(ret.asect, n+1);
135 for (i = ret.nasect; i <= n; i++)
136 ret.asect[i] = ret.asect[ret.nasect-1];
137 ret.nasect = n+1;
138 }
139 ret.asect[n].just_numbers = utob(p);
140 } else if (!ustricmp(source->keyword, L"text-title-align")) {
141 ret.atitle.align = utoalign(uadv(source->keyword));
142 } else if (!ustricmp(source->keyword, L"text-title-underline")) {
143 ret.atitle.underline = *uadv(source->keyword);
144 } else if (!ustricmp(source->keyword, L"text-versionid")) {
145 ret.include_version_id = utob(uadv(source->keyword));
146 } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
147 ret.indent_preambles = utob(uadv(source->keyword));
148 } else if (!ustricmp(source->keyword, L"text-bullet")) {
149 ret.bullet.text = uadv(source->keyword);
150 }
151 }
152 }
153
154 return ret;
155}
156
157void text_backend(paragraph *sourceform, keywordlist *keywords,
158 indexdata *idx) {
159 paragraph *p;
160 textconfig conf;
161 word *prefix, *body, *wp;
162 word spaceword;
163 FILE *fp;
164 char *prefixextra;
165 int indentb, indenta;
166
167 IGNORE(keywords); /* we don't happen to need this */
168 IGNORE(idx); /* or this */
169
170 conf = text_configure(sourceform);
171
172 /*
173 * Determine the output file name, and open the output file
174 *
175 * FIXME: want configurable output file names here. For the
176 * moment, we'll just call it `output.txt'.
177 */
178 fp = fopen("output.txt", "w");
179 if (!fp) {
180 error(err_cantopenw, "output.txt");
181 return;
182 }
183
184 /* Do the title */
185 for (p = sourceform; p; p = p->next)
186 if (p->type == para_Title)
187 text_heading(fp, NULL, NULL, p->words,
188 conf.atitle, conf.indent, conf.width);
189
190 /* Do the preamble and copyright */
191 for (p = sourceform; p; p = p->next)
192 if (p->type == para_Preamble)
193 text_para(fp, NULL, NULL, p->words,
194 conf.indent_preambles ? conf.indent : 0, 0,
195 conf.width + (conf.indent_preambles ? 0 : conf.indent));
196 for (p = sourceform; p; p = p->next)
197 if (p->type == para_Copyright)
198 text_para(fp, NULL, NULL, p->words,
199 conf.indent_preambles ? conf.indent : 0, 0,
200 conf.width + (conf.indent_preambles ? 0 : conf.indent));
201
202 /* Do the main document */
203 for (p = sourceform; p; p = p->next) switch (p->type) {
204
205 /*
206 * Things we ignore because we've already processed them or
207 * aren't going to touch them in this pass.
208 */
209 case para_IM:
210 case para_BR:
211 case para_Biblio: /* only touch BiblioCited */
212 case para_VersionID:
213 case para_Copyright:
214 case para_Preamble:
215 case para_NoCite:
216 case para_Title:
217 break;
218
219 /*
220 * Chapter titles.
221 */
222 case para_Chapter:
223 case para_Appendix:
224 case para_UnnumberedChapter:
225 text_heading(fp, p->kwtext, p->kwtext2, p->words,
226 conf.achapter, conf.indent, conf.width);
227 break;
228
229 case para_Heading:
230 case para_Subsect:
231 text_heading(fp, p->kwtext, p->kwtext2, p->words,
232 conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
233 conf.indent, conf.width);
234 break;
235
236 case para_Rule:
237 text_rule(fp, conf.indent, conf.width);
238 break;
239
240 case para_Normal:
241 case para_BiblioCited:
242 case para_Bullet:
243 case para_NumberedList:
244 if (p->type == para_Bullet) {
245 prefix = &conf.bullet;
246 prefixextra = NULL;
247 indentb = conf.listindentbefore;
248 indenta = conf.listindentafter;
249 } else if (p->type == para_NumberedList) {
250 prefix = p->kwtext;
251 prefixextra = "."; /* FIXME: configurability */
252 indentb = conf.listindentbefore;
253 indenta = conf.listindentafter;
254 } else {
255 prefix = NULL;
256 prefixextra = NULL;
257 indentb = indenta = 0;
258 }
259 if (p->type == para_BiblioCited) {
260 body = dup_word_list(p->kwtext);
261 for (wp = body; wp->next; wp = wp->next);
262 wp->next = &spaceword;
263 spaceword.next = p->words;
264 spaceword.alt = NULL;
265 spaceword.type = word_WhiteSpace;
266 spaceword.text = NULL;
267 } else {
268 wp = NULL;
269 body = p->words;
270 }
271 text_para(fp, prefix, prefixextra, body,
c83c6495 272 conf.indent + indentb, indenta,
273 conf.width - indentb - indenta);
d7482997 274 if (wp) {
275 wp->next = NULL;
276 free_word_list(body);
277 }
278 break;
279
280 case para_Code:
281 text_codepara(fp, p->words, conf.indent + conf.indent_code, conf.width - 2 * conf.indent_code);
282 break;
283 }
284
285 /* Do the version ID */
286 if (conf.include_version_id) {
287 for (p = sourceform; p; p = p->next)
288 if (p->type == para_VersionID)
289 text_versionid(fp, p->words);
290 }
291
292 /*
293 * Tidy up
294 */
295 fclose(fp);
296 sfree(conf.bullet.text);
297}
298
299/*
300 * Convert a wide string into a string of chars. If `result' is
301 * non-NULL, mallocs the resulting string and stores a pointer to
302 * it in `*result'. If `result' is NULL, merely checks whether all
303 * characters in the string are feasible for the output character
304 * set.
305 *
306 * Return is nonzero if all characters are OK. If not all
307 * characters are OK but `result' is non-NULL, a result _will_
308 * still be generated!
309 */
310static int text_convert(wchar_t *s, char **result) {
311 /*
312 * FIXME. Currently this is ISO8859-1 only.
313 */
314 int doing = (result != 0);
315 int ok = TRUE;
316 char *p = NULL;
317 int plen = 0, psize = 0;
318
319 for (; *s; s++) {
320 wchar_t c = *s;
321 char outc;
322
323 if ((c >= 32 && c <= 126) ||
324 (c >= 160 && c <= 255)) {
325 /* Char is OK. */
326 outc = (char)c;
327 } else {
328 /* Char is not OK. */
329 ok = FALSE;
330 outc = 0xBF; /* approximate the good old DEC `uh?' */
331 }
332 if (doing) {
333 if (plen >= psize) {
334 psize = plen + 256;
335 p = resize(p, psize);
336 }
337 p[plen++] = outc;
338 }
339 }
340 if (doing) {
341 p = resize(p, plen+1);
342 p[plen] = '\0';
343 *result = p;
344 }
345 return ok;
346}
347
348static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
349 char *c;
350
351 for (; text && text != end; text = text->next) switch (text->type) {
352 case word_HyperLink:
353 case word_HyperEnd:
354 case word_UpperXref:
355 case word_LowerXref:
356 case word_XrefEnd:
357 case word_IndexRef:
358 break;
359
360 case word_Normal:
361 case word_Emph:
362 case word_Code:
363 case word_WeakCode:
364 case word_WhiteSpace:
365 case word_EmphSpace:
366 case word_CodeSpace:
367 case word_WkCodeSpace:
368 case word_Quote:
369 case word_EmphQuote:
370 case word_CodeQuote:
371 case word_WkCodeQuote:
372 assert(text->type != word_CodeQuote &&
373 text->type != word_WkCodeQuote);
374 if (towordstyle(text->type) == word_Emph &&
375 (attraux(text->aux) == attr_First ||
376 attraux(text->aux) == attr_Only))
377 rdaddc(rs, '_'); /* FIXME: configurability */
378 else if (towordstyle(text->type) == word_Code &&
379 (attraux(text->aux) == attr_First ||
380 attraux(text->aux) == attr_Only))
381 rdaddc(rs, '`'); /* FIXME: configurability */
382 if (removeattr(text->type) == word_Normal) {
383 if (text_convert(text->text, &c))
384 rdaddsc(rs, c);
385 else
386 text_rdaddwc(rs, text->alt, NULL);
387 sfree(c);
388 } else if (removeattr(text->type) == word_WhiteSpace) {
389 rdaddc(rs, ' ');
390 } else if (removeattr(text->type) == word_Quote) {
391 rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
392 /* FIXME: configurability */
393 }
394 if (towordstyle(text->type) == word_Emph &&
395 (attraux(text->aux) == attr_Last ||
396 attraux(text->aux) == attr_Only))
397 rdaddc(rs, '_'); /* FIXME: configurability */
398 else if (towordstyle(text->type) == word_Code &&
399 (attraux(text->aux) == attr_Last ||
400 attraux(text->aux) == attr_Only))
401 rdaddc(rs, '\''); /* FIXME: configurability */
402 break;
403 }
404}
405
406static int text_width(word *);
407
408static int text_width_list(word *text) {
409 int w = 0;
410 while (text) {
411 w += text_width(text);
412 text = text->next;
413 }
414 return w;
415}
416
417static int text_width(word *text) {
418 switch (text->type) {
419 case word_HyperLink:
420 case word_HyperEnd:
421 case word_UpperXref:
422 case word_LowerXref:
423 case word_XrefEnd:
424 case word_IndexRef:
425 return 0;
426
427 case word_Normal:
428 case word_Emph:
429 case word_Code:
430 case word_WeakCode:
431 return (((text->type == word_Emph ||
432 text->type == word_Code)
433 ? (attraux(text->aux) == attr_Only ? 2 :
434 attraux(text->aux) == attr_Always ? 0 : 1)
435 : 0) +
436 (text_convert(text->text, NULL) ?
437 ustrlen(text->text) :
438 text_width_list(text->alt)));
439
440 case word_WhiteSpace:
441 case word_EmphSpace:
442 case word_CodeSpace:
443 case word_WkCodeSpace:
444 case word_Quote:
445 case word_EmphQuote:
446 case word_CodeQuote:
447 case word_WkCodeQuote:
448 assert(text->type != word_CodeQuote &&
449 text->type != word_WkCodeQuote);
450 return (((towordstyle(text->type) == word_Emph ||
451 towordstyle(text->type) == word_Code)
452 ? (attraux(text->aux) == attr_Only ? 2 :
453 attraux(text->aux) == attr_Always ? 0 : 1)
454 : 0) + 1);
455 }
456 return 0; /* should never happen */
457}
458
459static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
460 alignstruct align, int indent, int width) {
461 rdstringc t = { 0, 0, NULL };
462 int margin, length;
463 int firstlinewidth, wrapwidth;
464 wrappedline *wrapping, *p;
465
466 if (align.just_numbers && nprefix) {
467 text_rdaddwc(&t, nprefix, NULL);
468 rdaddc(&t, ' '); /* FIXME: as below */
469 } else if (!align.just_numbers && tprefix) {
470 text_rdaddwc(&t, tprefix, NULL);
471 rdaddsc(&t, ": "); /* FIXME: configurability */
472 }
473 margin = length = (t.text ? strlen(t.text) : 0);
474
475 if (align.align == LEFTPLUS) {
476 margin = indent - margin;
477 if (margin < 0) margin = 0;
478 firstlinewidth = indent + width - margin - length;
479 wrapwidth = width;
480 } else if (align.align == LEFT || align.align == CENTRE) {
481 margin = 0;
482 firstlinewidth = indent + width - length;
483 wrapwidth = indent + width;
484 }
485
486 wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
487 for (p = wrapping; p; p = p->next) {
488 text_rdaddwc(&t, p->begin, p->end);
489 length = (t.text ? strlen(t.text) : 0);
490 if (align.align == CENTRE) {
491 margin = (indent + width - length)/2;
492 if (margin < 0) margin = 0;
493 }
494 fprintf(fp, "%*s%s\n", margin, "", t.text);
495 if (align.underline != L'\0') {
496 char *u, uc;
497 wchar_t uw[2];
498 uw[0] = align.underline; uw[1] = L'\0';
499 text_convert(uw, &u);
500 uc = u[0];
501 sfree(u);
502 fprintf(fp, "%*s", margin, "");
503 while (length--)
504 putc(uc, fp);
505 putc('\n', fp);
506 }
507 if (align.align == LEFTPLUS)
508 margin = indent;
509 else
510 margin = 0;
511 sfree(t.text);
512 t = empty_rdstringc;
513 }
514 wrap_free(wrapping);
515 putc('\n', fp);
516
517 sfree(t.text);
518}
519
520static void text_rule(FILE *fp, int indent, int width) {
521 while (indent--) putc(' ', fp);
522 while (width--) putc('-', fp); /* FIXME: configurability! */
523 putc('\n', fp);
524 putc('\n', fp);
525}
526
527static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
528 int indent, int extraindent, int width) {
529 wrappedline *wrapping, *p;
530 rdstringc pfx = { 0, 0, NULL };
531 int e;
532 int firstlinewidth = width;
533
534 if (prefix) {
535 text_rdaddwc(&pfx, prefix, NULL);
536 if (prefixextra)
537 rdaddsc(&pfx, prefixextra);
538 fprintf(fp, "%*s%s", indent, "", pfx.text);
c83c6495 539 /* If the prefix is too long, shorten the first line to fit. */
d7482997 540 e = extraindent - strlen(pfx.text);
541 if (e < 0) {
c83c6495 542 firstlinewidth += e; /* this decreases it, since e < 0 */
d7482997 543 if (firstlinewidth < 0) {
544 e = indent + extraindent;
545 firstlinewidth = width;
546 fprintf(fp, "\n");
c83c6495 547 } else
548 e = 0;
d7482997 549 }
550 sfree(pfx.text);
551 } else
552 e = indent + extraindent;
553
554 wrapping = wrap_para(text, firstlinewidth, width, text_width);
555 for (p = wrapping; p; p = p->next) {
556 rdstringc t = { 0, 0, NULL };
557 text_rdaddwc(&t, p->begin, p->end);
558 fprintf(fp, "%*s%s\n", e, "", t.text);
559 e = indent + extraindent;
560 sfree(t.text);
561 }
562 wrap_free(wrapping);
563 putc('\n', fp);
564}
565
566static void text_codepara(FILE *fp, word *text, int indent, int width) {
567 for (; text; text = text->next) if (text->type == word_WeakCode) {
568 char *c;
569 text_convert(text->text, &c);
570 if (strlen(c) > (size_t)width) {
571 /* FIXME: warn */
572 }
573 fprintf(fp, "%*s%s\n", indent, "", c);
574 sfree(c);
575 }
576
577 putc('\n', fp);
578}
579
580static void text_versionid(FILE *fp, word *text) {
581 rdstringc t = { 0, 0, NULL };
582
583 rdaddc(&t, '['); /* FIXME: configurability */
584 text_rdaddwc(&t, text, NULL);
585 rdaddc(&t, ']'); /* FIXME: configurability */
586
587 fprintf(fp, "%s\n", t.text);
588 sfree(t.text);
589}