Add configurability for the suffix after the section number and
[sgt/halibut] / bk_text.c
1 /*
2 * text backend for Halibut
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <assert.h>
8 #include "halibut.h"
9
10 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
11 typedef struct {
12 alignment align;
13 int just_numbers;
14 wchar_t underline;
15 wchar_t *number_suffix;
16 } alignstruct;
17
18 typedef struct {
19 int indent, indent_code;
20 int listindentbefore, listindentafter;
21 int width;
22 alignstruct atitle, achapter, *asect;
23 int nasect;
24 int include_version_id;
25 int indent_preambles;
26 word bullet;
27 } textconfig;
28
29 static int text_convert(wchar_t *, char **);
30
31 static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
32 static void text_rule(FILE *, int, int);
33 static void text_para(FILE *, word *, char *, word *, int, int, int);
34 static void text_codepara(FILE *, word *, int, int);
35 static void text_versionid(FILE *, word *);
36
37 static alignment utoalign(wchar_t *p) {
38 if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
39 return CENTRE;
40 if (!ustricmp(p, L"leftplus"))
41 return LEFTPLUS;
42 return LEFT;
43 }
44
45 static textconfig text_configure(paragraph *source) {
46 textconfig ret;
47
48 /*
49 * Non-negotiables.
50 */
51 ret.bullet.next = NULL;
52 ret.bullet.alt = NULL;
53 ret.bullet.type = word_Normal;
54 ret.atitle.just_numbers = FALSE; /* ignored */
55
56 /*
57 * Defaults.
58 */
59 ret.indent = 7;
60 ret.indent_code = 2;
61 ret.listindentbefore = 1;
62 ret.listindentafter = 3;
63 ret.width = 68;
64 ret.atitle.align = CENTRE;
65 ret.atitle.underline = L'=';
66 ret.achapter.align = LEFT;
67 ret.achapter.just_numbers = FALSE;
68 ret.achapter.number_suffix = ustrdup(L": ");
69 ret.achapter.underline = L'-';
70 ret.nasect = 1;
71 ret.asect = mknewa(alignstruct, ret.nasect);
72 ret.asect[0].align = LEFTPLUS;
73 ret.asect[0].just_numbers = TRUE;
74 ret.asect[0].number_suffix = ustrdup(L" ");
75 ret.asect[0].underline = L'\0';
76 ret.include_version_id = TRUE;
77 ret.indent_preambles = FALSE;
78 ret.bullet.text = ustrdup(L"-");
79
80 for (; source; source = source->next) {
81 if (source->type == para_Config) {
82 if (!ustricmp(source->keyword, L"text-indent")) {
83 ret.indent = utoi(uadv(source->keyword));
84 } else if (!ustricmp(source->keyword, L"text-indent-code")) {
85 ret.indent_code = utoi(uadv(source->keyword));
86 } else if (!ustricmp(source->keyword, L"text-width")) {
87 ret.width = utoi(uadv(source->keyword));
88 } else if (!ustricmp(source->keyword, L"text-list-indent")) {
89 ret.listindentbefore = utoi(uadv(source->keyword));
90 } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
91 ret.listindentafter = utoi(uadv(source->keyword));
92 } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
93 ret.achapter.align = utoalign(uadv(source->keyword));
94 } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
95 ret.achapter.underline = *uadv(source->keyword);
96 } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
97 ret.achapter.just_numbers = utob(uadv(source->keyword));
98 } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
99 ret.achapter.number_suffix = uadv(source->keyword);
100 } else if (!ustricmp(source->keyword, L"text-section-align")) {
101 wchar_t *p = uadv(source->keyword);
102 int n = 0;
103 if (uisdigit(*p)) {
104 n = utoi(p);
105 p = uadv(p);
106 }
107 if (n >= ret.nasect) {
108 int i;
109 ret.asect = resize(ret.asect, n+1);
110 for (i = ret.nasect; i <= n; i++)
111 ret.asect[i] = ret.asect[ret.nasect-1];
112 ret.nasect = n+1;
113 }
114 ret.asect[n].align = utoalign(p);
115 } else if (!ustricmp(source->keyword, L"text-section-underline")) {
116 wchar_t *p = uadv(source->keyword);
117 int n = 0;
118 if (uisdigit(*p)) {
119 n = utoi(p);
120 p = uadv(p);
121 }
122 if (n >= ret.nasect) {
123 int i;
124 ret.asect = resize(ret.asect, n+1);
125 for (i = ret.nasect; i <= n; i++)
126 ret.asect[i] = ret.asect[ret.nasect-1];
127 ret.nasect = n+1;
128 }
129 ret.asect[n].underline = *p;
130 } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
131 wchar_t *p = uadv(source->keyword);
132 int n = 0;
133 if (uisdigit(*p)) {
134 n = utoi(p);
135 p = uadv(p);
136 }
137 if (n >= ret.nasect) {
138 int i;
139 ret.asect = resize(ret.asect, n+1);
140 for (i = ret.nasect; i <= n; i++)
141 ret.asect[i] = ret.asect[ret.nasect-1];
142 ret.nasect = n+1;
143 }
144 ret.asect[n].just_numbers = utob(p);
145 } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
146 wchar_t *p = uadv(source->keyword);
147 int n = 0;
148 if (uisdigit(*p)) {
149 n = utoi(p);
150 p = uadv(p);
151 }
152 if (n >= ret.nasect) {
153 int i;
154 ret.asect = resize(ret.asect, n+1);
155 for (i = ret.nasect; i <= n; i++)
156 ret.asect[i] = ret.asect[ret.nasect-1];
157 ret.nasect = n+1;
158 }
159 ret.asect[n].number_suffix = p;
160 } else if (!ustricmp(source->keyword, L"text-title-align")) {
161 ret.atitle.align = utoalign(uadv(source->keyword));
162 } else if (!ustricmp(source->keyword, L"text-title-underline")) {
163 ret.atitle.underline = *uadv(source->keyword);
164 } else if (!ustricmp(source->keyword, L"text-versionid")) {
165 ret.include_version_id = utob(uadv(source->keyword));
166 } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
167 ret.indent_preambles = utob(uadv(source->keyword));
168 } else if (!ustricmp(source->keyword, L"text-bullet")) {
169 ret.bullet.text = uadv(source->keyword);
170 }
171 }
172 }
173
174 return ret;
175 }
176
177 void text_backend(paragraph *sourceform, keywordlist *keywords,
178 indexdata *idx) {
179 paragraph *p;
180 textconfig conf;
181 word *prefix, *body, *wp;
182 word spaceword;
183 FILE *fp;
184 char *prefixextra;
185 int indentb, indenta;
186
187 IGNORE(keywords); /* we don't happen to need this */
188 IGNORE(idx); /* or this */
189
190 conf = text_configure(sourceform);
191
192 /*
193 * Determine the output file name, and open the output file
194 *
195 * FIXME: want configurable output file names here. For the
196 * moment, we'll just call it `output.txt'.
197 */
198 fp = fopen("output.txt", "w");
199 if (!fp) {
200 error(err_cantopenw, "output.txt");
201 return;
202 }
203
204 /* Do the title */
205 for (p = sourceform; p; p = p->next)
206 if (p->type == para_Title)
207 text_heading(fp, NULL, NULL, p->words,
208 conf.atitle, conf.indent, conf.width);
209
210 /* Do the preamble and copyright */
211 for (p = sourceform; p; p = p->next)
212 if (p->type == para_Preamble)
213 text_para(fp, NULL, NULL, p->words,
214 conf.indent_preambles ? conf.indent : 0, 0,
215 conf.width + (conf.indent_preambles ? 0 : conf.indent));
216 for (p = sourceform; p; p = p->next)
217 if (p->type == para_Copyright)
218 text_para(fp, NULL, NULL, p->words,
219 conf.indent_preambles ? conf.indent : 0, 0,
220 conf.width + (conf.indent_preambles ? 0 : conf.indent));
221
222 /* Do the main document */
223 for (p = sourceform; p; p = p->next) switch (p->type) {
224
225 /*
226 * Things we ignore because we've already processed them or
227 * aren't going to touch them in this pass.
228 */
229 case para_IM:
230 case para_BR:
231 case para_Biblio: /* only touch BiblioCited */
232 case para_VersionID:
233 case para_Copyright:
234 case para_Preamble:
235 case para_NoCite:
236 case para_Title:
237 break;
238
239 /*
240 * Chapter titles.
241 */
242 case para_Chapter:
243 case para_Appendix:
244 case para_UnnumberedChapter:
245 text_heading(fp, p->kwtext, p->kwtext2, p->words,
246 conf.achapter, conf.indent, conf.width);
247 break;
248
249 case para_Heading:
250 case para_Subsect:
251 text_heading(fp, p->kwtext, p->kwtext2, p->words,
252 conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
253 conf.indent, conf.width);
254 break;
255
256 case para_Rule:
257 text_rule(fp, conf.indent, conf.width);
258 break;
259
260 case para_Normal:
261 case para_BiblioCited:
262 case para_Bullet:
263 case para_NumberedList:
264 if (p->type == para_Bullet) {
265 prefix = &conf.bullet;
266 prefixextra = NULL;
267 indentb = conf.listindentbefore;
268 indenta = conf.listindentafter;
269 } else if (p->type == para_NumberedList) {
270 prefix = p->kwtext;
271 prefixextra = "."; /* FIXME: configurability */
272 indentb = conf.listindentbefore;
273 indenta = conf.listindentafter;
274 } else {
275 prefix = NULL;
276 prefixextra = NULL;
277 indentb = indenta = 0;
278 }
279 if (p->type == para_BiblioCited) {
280 body = dup_word_list(p->kwtext);
281 for (wp = body; wp->next; wp = wp->next);
282 wp->next = &spaceword;
283 spaceword.next = p->words;
284 spaceword.alt = NULL;
285 spaceword.type = word_WhiteSpace;
286 spaceword.text = NULL;
287 } else {
288 wp = NULL;
289 body = p->words;
290 }
291 text_para(fp, prefix, prefixextra, body,
292 conf.indent + indentb, indenta,
293 conf.width - indentb - indenta);
294 if (wp) {
295 wp->next = NULL;
296 free_word_list(body);
297 }
298 break;
299
300 case para_Code:
301 text_codepara(fp, p->words, conf.indent + conf.indent_code, conf.width - 2 * conf.indent_code);
302 break;
303 }
304
305 /* Do the version ID */
306 if (conf.include_version_id) {
307 for (p = sourceform; p; p = p->next)
308 if (p->type == para_VersionID)
309 text_versionid(fp, p->words);
310 }
311
312 /*
313 * Tidy up
314 */
315 fclose(fp);
316 sfree(conf.bullet.text);
317 }
318
319 /*
320 * Convert a wide string into a string of chars. If `result' is
321 * non-NULL, mallocs the resulting string and stores a pointer to
322 * it in `*result'. If `result' is NULL, merely checks whether all
323 * characters in the string are feasible for the output character
324 * set.
325 *
326 * Return is nonzero if all characters are OK. If not all
327 * characters are OK but `result' is non-NULL, a result _will_
328 * still be generated!
329 */
330 static int text_convert(wchar_t *s, char **result) {
331 /*
332 * FIXME. Currently this is ISO8859-1 only.
333 */
334 int doing = (result != 0);
335 int ok = TRUE;
336 char *p = NULL;
337 int plen = 0, psize = 0;
338
339 for (; *s; s++) {
340 wchar_t c = *s;
341 char outc;
342
343 if ((c >= 32 && c <= 126) ||
344 (c >= 160 && c <= 255)) {
345 /* Char is OK. */
346 outc = (char)c;
347 } else {
348 /* Char is not OK. */
349 ok = FALSE;
350 outc = 0xBF; /* approximate the good old DEC `uh?' */
351 }
352 if (doing) {
353 if (plen >= psize) {
354 psize = plen + 256;
355 p = resize(p, psize);
356 }
357 p[plen++] = outc;
358 }
359 }
360 if (doing) {
361 p = resize(p, plen+1);
362 p[plen] = '\0';
363 *result = p;
364 }
365 return ok;
366 }
367
368 static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
369 char *c;
370
371 for (; text && text != end; text = text->next) switch (text->type) {
372 case word_HyperLink:
373 case word_HyperEnd:
374 case word_UpperXref:
375 case word_LowerXref:
376 case word_XrefEnd:
377 case word_IndexRef:
378 break;
379
380 case word_Normal:
381 case word_Emph:
382 case word_Code:
383 case word_WeakCode:
384 case word_WhiteSpace:
385 case word_EmphSpace:
386 case word_CodeSpace:
387 case word_WkCodeSpace:
388 case word_Quote:
389 case word_EmphQuote:
390 case word_CodeQuote:
391 case word_WkCodeQuote:
392 assert(text->type != word_CodeQuote &&
393 text->type != word_WkCodeQuote);
394 if (towordstyle(text->type) == word_Emph &&
395 (attraux(text->aux) == attr_First ||
396 attraux(text->aux) == attr_Only))
397 rdaddc(rs, '_'); /* FIXME: configurability */
398 else if (towordstyle(text->type) == word_Code &&
399 (attraux(text->aux) == attr_First ||
400 attraux(text->aux) == attr_Only))
401 rdaddc(rs, '`'); /* FIXME: configurability */
402 if (removeattr(text->type) == word_Normal) {
403 if (text_convert(text->text, &c))
404 rdaddsc(rs, c);
405 else
406 text_rdaddwc(rs, text->alt, NULL);
407 sfree(c);
408 } else if (removeattr(text->type) == word_WhiteSpace) {
409 rdaddc(rs, ' ');
410 } else if (removeattr(text->type) == word_Quote) {
411 rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
412 /* FIXME: configurability */
413 }
414 if (towordstyle(text->type) == word_Emph &&
415 (attraux(text->aux) == attr_Last ||
416 attraux(text->aux) == attr_Only))
417 rdaddc(rs, '_'); /* FIXME: configurability */
418 else if (towordstyle(text->type) == word_Code &&
419 (attraux(text->aux) == attr_Last ||
420 attraux(text->aux) == attr_Only))
421 rdaddc(rs, '\''); /* FIXME: configurability */
422 break;
423 }
424 }
425
426 static int text_width(word *);
427
428 static int text_width_list(word *text) {
429 int w = 0;
430 while (text) {
431 w += text_width(text);
432 text = text->next;
433 }
434 return w;
435 }
436
437 static int text_width(word *text) {
438 switch (text->type) {
439 case word_HyperLink:
440 case word_HyperEnd:
441 case word_UpperXref:
442 case word_LowerXref:
443 case word_XrefEnd:
444 case word_IndexRef:
445 return 0;
446
447 case word_Normal:
448 case word_Emph:
449 case word_Code:
450 case word_WeakCode:
451 return (((text->type == word_Emph ||
452 text->type == word_Code)
453 ? (attraux(text->aux) == attr_Only ? 2 :
454 attraux(text->aux) == attr_Always ? 0 : 1)
455 : 0) +
456 (text_convert(text->text, NULL) ?
457 ustrlen(text->text) :
458 text_width_list(text->alt)));
459
460 case word_WhiteSpace:
461 case word_EmphSpace:
462 case word_CodeSpace:
463 case word_WkCodeSpace:
464 case word_Quote:
465 case word_EmphQuote:
466 case word_CodeQuote:
467 case word_WkCodeQuote:
468 assert(text->type != word_CodeQuote &&
469 text->type != word_WkCodeQuote);
470 return (((towordstyle(text->type) == word_Emph ||
471 towordstyle(text->type) == word_Code)
472 ? (attraux(text->aux) == attr_Only ? 2 :
473 attraux(text->aux) == attr_Always ? 0 : 1)
474 : 0) + 1);
475 }
476 return 0; /* should never happen */
477 }
478
479 static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
480 alignstruct align, int indent, int width) {
481 rdstringc t = { 0, 0, NULL };
482 int margin, length;
483 int firstlinewidth, wrapwidth;
484 wrappedline *wrapping, *p;
485
486 if (align.just_numbers && nprefix) {
487 char *c;
488 text_rdaddwc(&t, nprefix, NULL);
489 if (text_convert(align.number_suffix, &c)) {
490 rdaddsc(&t, c);
491 sfree(c);
492 }
493 } else if (!align.just_numbers && tprefix) {
494 char *c;
495 text_rdaddwc(&t, tprefix, NULL);
496 if (text_convert(align.number_suffix, &c)) {
497 rdaddsc(&t, c);
498 sfree(c);
499 }
500 }
501 margin = length = (t.text ? strlen(t.text) : 0);
502
503 if (align.align == LEFTPLUS) {
504 margin = indent - margin;
505 if (margin < 0) margin = 0;
506 firstlinewidth = indent + width - margin - length;
507 wrapwidth = width;
508 } else if (align.align == LEFT || align.align == CENTRE) {
509 margin = 0;
510 firstlinewidth = indent + width - length;
511 wrapwidth = indent + width;
512 }
513
514 wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
515 for (p = wrapping; p; p = p->next) {
516 text_rdaddwc(&t, p->begin, p->end);
517 length = (t.text ? strlen(t.text) : 0);
518 if (align.align == CENTRE) {
519 margin = (indent + width - length)/2;
520 if (margin < 0) margin = 0;
521 }
522 fprintf(fp, "%*s%s\n", margin, "", t.text);
523 if (align.underline != L'\0') {
524 char *u, uc;
525 wchar_t uw[2];
526 uw[0] = align.underline; uw[1] = L'\0';
527 text_convert(uw, &u);
528 uc = u[0];
529 sfree(u);
530 fprintf(fp, "%*s", margin, "");
531 while (length--)
532 putc(uc, fp);
533 putc('\n', fp);
534 }
535 if (align.align == LEFTPLUS)
536 margin = indent;
537 else
538 margin = 0;
539 sfree(t.text);
540 t = empty_rdstringc;
541 }
542 wrap_free(wrapping);
543 putc('\n', fp);
544
545 sfree(t.text);
546 }
547
548 static void text_rule(FILE *fp, int indent, int width) {
549 while (indent--) putc(' ', fp);
550 while (width--) putc('-', fp); /* FIXME: configurability! */
551 putc('\n', fp);
552 putc('\n', fp);
553 }
554
555 static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
556 int indent, int extraindent, int width) {
557 wrappedline *wrapping, *p;
558 rdstringc pfx = { 0, 0, NULL };
559 int e;
560 int firstlinewidth = width;
561
562 if (prefix) {
563 text_rdaddwc(&pfx, prefix, NULL);
564 if (prefixextra)
565 rdaddsc(&pfx, prefixextra);
566 fprintf(fp, "%*s%s", indent, "", pfx.text);
567 /* If the prefix is too long, shorten the first line to fit. */
568 e = extraindent - strlen(pfx.text);
569 if (e < 0) {
570 firstlinewidth += e; /* this decreases it, since e < 0 */
571 if (firstlinewidth < 0) {
572 e = indent + extraindent;
573 firstlinewidth = width;
574 fprintf(fp, "\n");
575 } else
576 e = 0;
577 }
578 sfree(pfx.text);
579 } else
580 e = indent + extraindent;
581
582 wrapping = wrap_para(text, firstlinewidth, width, text_width);
583 for (p = wrapping; p; p = p->next) {
584 rdstringc t = { 0, 0, NULL };
585 text_rdaddwc(&t, p->begin, p->end);
586 fprintf(fp, "%*s%s\n", e, "", t.text);
587 e = indent + extraindent;
588 sfree(t.text);
589 }
590 wrap_free(wrapping);
591 putc('\n', fp);
592 }
593
594 static void text_codepara(FILE *fp, word *text, int indent, int width) {
595 for (; text; text = text->next) if (text->type == word_WeakCode) {
596 char *c;
597 text_convert(text->text, &c);
598 if (strlen(c) > (size_t)width) {
599 /* FIXME: warn */
600 }
601 fprintf(fp, "%*s%s\n", indent, "", c);
602 sfree(c);
603 }
604
605 putc('\n', fp);
606 }
607
608 static void text_versionid(FILE *fp, word *text) {
609 rdstringc t = { 0, 0, NULL };
610
611 rdaddc(&t, '['); /* FIXME: configurability */
612 text_rdaddwc(&t, text, NULL);
613 rdaddc(&t, ']'); /* FIXME: configurability */
614
615 fprintf(fp, "%s\n", t.text);
616 sfree(t.text);
617 }