Rewrite ustrftime(), so that (a) it uses wcsftime() where available,
[sgt/halibut] / bk_text.c
CommitLineData
d7482997 1/*
2 * text backend for Halibut
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8#include "halibut.h"
9
10typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
11typedef struct {
12 alignment align;
13 int just_numbers;
14 wchar_t underline;
63223c78 15 wchar_t *number_suffix;
d7482997 16} alignstruct;
17
18typedef struct {
19 int indent, indent_code;
20 int listindentbefore, listindentafter;
21 int width;
22 alignstruct atitle, achapter, *asect;
23 int nasect;
24 int include_version_id;
25 int indent_preambles;
2ac8ceac 26 int charset;
d7482997 27 word bullet;
50d6b4bd 28 char *filename;
d7482997 29} textconfig;
30
2ac8ceac 31typedef struct {
32 FILE *fp;
33 int charset;
34 charset_state state;
35} textfile;
36
37static void text_heading(textfile *, word *, word *, word *, alignstruct,
38 int,int);
39static void text_rule(textfile *, int, int);
40static void text_para(textfile *, word *, wchar_t *, word *, int, int, int);
41static void text_codepara(textfile *, word *, int, int);
42static void text_versionid(textfile *, word *);
d7482997 43
2ac8ceac 44static void text_output(textfile *, const wchar_t *);
45static void text_output_many(textfile *, int, wchar_t);
d7482997 46
47static alignment utoalign(wchar_t *p) {
48 if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
49 return CENTRE;
50 if (!ustricmp(p, L"leftplus"))
51 return LEFTPLUS;
52 return LEFT;
53}
54
55static textconfig text_configure(paragraph *source) {
56 textconfig ret;
57
58 /*
59 * Non-negotiables.
60 */
61 ret.bullet.next = NULL;
62 ret.bullet.alt = NULL;
63 ret.bullet.type = word_Normal;
64 ret.atitle.just_numbers = FALSE; /* ignored */
65
66 /*
67 * Defaults.
68 */
69 ret.indent = 7;
70 ret.indent_code = 2;
71 ret.listindentbefore = 1;
72 ret.listindentafter = 3;
73 ret.width = 68;
74 ret.atitle.align = CENTRE;
75 ret.atitle.underline = L'=';
76 ret.achapter.align = LEFT;
77 ret.achapter.just_numbers = FALSE;
e5e6bf9d 78 ret.achapter.number_suffix = L": ";
d7482997 79 ret.achapter.underline = L'-';
80 ret.nasect = 1;
81 ret.asect = mknewa(alignstruct, ret.nasect);
82 ret.asect[0].align = LEFTPLUS;
83 ret.asect[0].just_numbers = TRUE;
e5e6bf9d 84 ret.asect[0].number_suffix = L" ";
d7482997 85 ret.asect[0].underline = L'\0';
86 ret.include_version_id = TRUE;
87 ret.indent_preambles = FALSE;
e5e6bf9d 88 ret.bullet.text = L"-";
50d6b4bd 89 ret.filename = dupstr("output.txt");
2ac8ceac 90 ret.charset = CS_ASCII;
d7482997 91
92 for (; source; source = source->next) {
93 if (source->type == para_Config) {
94 if (!ustricmp(source->keyword, L"text-indent")) {
95 ret.indent = utoi(uadv(source->keyword));
2ac8ceac 96 } else if (!ustricmp(source->keyword, L"text-charset")) {
97 char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
98 ret.charset = charset_from_localenc(csname);
99 sfree(csname);
50d6b4bd 100 } else if (!ustricmp(source->keyword, L"text-filename")) {
101 sfree(ret.filename);
e4ea58f8 102 ret.filename = dupstr(adv(source->origkeyword));
d7482997 103 } else if (!ustricmp(source->keyword, L"text-indent-code")) {
104 ret.indent_code = utoi(uadv(source->keyword));
105 } else if (!ustricmp(source->keyword, L"text-width")) {
106 ret.width = utoi(uadv(source->keyword));
107 } else if (!ustricmp(source->keyword, L"text-list-indent")) {
108 ret.listindentbefore = utoi(uadv(source->keyword));
109 } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
110 ret.listindentafter = utoi(uadv(source->keyword));
111 } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
112 ret.achapter.align = utoalign(uadv(source->keyword));
113 } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
114 ret.achapter.underline = *uadv(source->keyword);
115 } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
c83c6495 116 ret.achapter.just_numbers = utob(uadv(source->keyword));
63223c78 117 } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
e5e6bf9d 118 ret.achapter.number_suffix = uadv(source->keyword);
d7482997 119 } else if (!ustricmp(source->keyword, L"text-section-align")) {
120 wchar_t *p = uadv(source->keyword);
121 int n = 0;
122 if (uisdigit(*p)) {
123 n = utoi(p);
124 p = uadv(p);
125 }
126 if (n >= ret.nasect) {
127 int i;
128 ret.asect = resize(ret.asect, n+1);
129 for (i = ret.nasect; i <= n; i++)
130 ret.asect[i] = ret.asect[ret.nasect-1];
131 ret.nasect = n+1;
132 }
133 ret.asect[n].align = utoalign(p);
134 } else if (!ustricmp(source->keyword, L"text-section-underline")) {
135 wchar_t *p = uadv(source->keyword);
136 int n = 0;
137 if (uisdigit(*p)) {
138 n = utoi(p);
139 p = uadv(p);
140 }
141 if (n >= ret.nasect) {
142 int i;
143 ret.asect = resize(ret.asect, n+1);
144 for (i = ret.nasect; i <= n; i++)
145 ret.asect[i] = ret.asect[ret.nasect-1];
146 ret.nasect = n+1;
147 }
148 ret.asect[n].underline = *p;
149 } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
150 wchar_t *p = uadv(source->keyword);
151 int n = 0;
152 if (uisdigit(*p)) {
153 n = utoi(p);
154 p = uadv(p);
155 }
156 if (n >= ret.nasect) {
157 int i;
158 ret.asect = resize(ret.asect, n+1);
159 for (i = ret.nasect; i <= n; i++)
160 ret.asect[i] = ret.asect[ret.nasect-1];
161 ret.nasect = n+1;
162 }
163 ret.asect[n].just_numbers = utob(p);
63223c78 164 } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
165 wchar_t *p = uadv(source->keyword);
166 int n = 0;
167 if (uisdigit(*p)) {
168 n = utoi(p);
169 p = uadv(p);
170 }
171 if (n >= ret.nasect) {
172 int i;
173 ret.asect = resize(ret.asect, n+1);
e5e6bf9d 174 for (i = ret.nasect; i <= n; i++) {
63223c78 175 ret.asect[i] = ret.asect[ret.nasect-1];
e5e6bf9d 176 }
63223c78 177 ret.nasect = n+1;
178 }
e5e6bf9d 179 ret.asect[n].number_suffix = p;
d7482997 180 } else if (!ustricmp(source->keyword, L"text-title-align")) {
181 ret.atitle.align = utoalign(uadv(source->keyword));
182 } else if (!ustricmp(source->keyword, L"text-title-underline")) {
183 ret.atitle.underline = *uadv(source->keyword);
184 } else if (!ustricmp(source->keyword, L"text-versionid")) {
185 ret.include_version_id = utob(uadv(source->keyword));
186 } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
187 ret.indent_preambles = utob(uadv(source->keyword));
188 } else if (!ustricmp(source->keyword, L"text-bullet")) {
189 ret.bullet.text = uadv(source->keyword);
190 }
191 }
192 }
193
194 return ret;
195}
196
ba9c1487 197paragraph *text_config_filename(char *filename)
198{
e4ea58f8 199 return cmdline_cfg_simple("text-filename", filename, NULL);
ba9c1487 200}
201
d7482997 202void text_backend(paragraph *sourceform, keywordlist *keywords,
43341922 203 indexdata *idx, void *unused) {
d7482997 204 paragraph *p;
205 textconfig conf;
206 word *prefix, *body, *wp;
207 word spaceword;
2ac8ceac 208 textfile tf;
209 wchar_t *prefixextra;
7136a6c7 210 int nesting, nestindent;
d7482997 211 int indentb, indenta;
212
43341922 213 IGNORE(unused);
d7482997 214 IGNORE(keywords); /* we don't happen to need this */
215 IGNORE(idx); /* or this */
216
217 conf = text_configure(sourceform);
218
219 /*
50d6b4bd 220 * Open the output file.
d7482997 221 */
2ac8ceac 222 tf.fp = fopen(conf.filename, "w");
223 if (!tf.fp) {
50d6b4bd 224 error(err_cantopenw, conf.filename);
d7482997 225 return;
226 }
2ac8ceac 227 tf.charset = conf.charset;
228 tf.state = charset_init_state;
d7482997 229
230 /* Do the title */
231 for (p = sourceform; p; p = p->next)
232 if (p->type == para_Title)
2ac8ceac 233 text_heading(&tf, NULL, NULL, p->words,
d7482997 234 conf.atitle, conf.indent, conf.width);
235
7136a6c7 236 nestindent = conf.listindentbefore + conf.listindentafter;
8902e0ed 237 nesting = (conf.indent_preambles ? 0 : -conf.indent);
7136a6c7 238
d7482997 239 /* Do the main document */
240 for (p = sourceform; p; p = p->next) switch (p->type) {
241
2614b01d 242 case para_QuotePush:
243 nesting += 2;
244 break;
245 case para_QuotePop:
246 nesting -= 2;
247 assert(nesting >= 0);
248 break;
249
7136a6c7 250 case para_LcontPush:
2614b01d 251 nesting += nestindent;
7136a6c7 252 break;
253 case para_LcontPop:
2614b01d 254 nesting -= nestindent;
255 assert(nesting >= 0);
7136a6c7 256 break;
257
d7482997 258 /*
259 * Things we ignore because we've already processed them or
260 * aren't going to touch them in this pass.
261 */
262 case para_IM:
263 case para_BR:
264 case para_Biblio: /* only touch BiblioCited */
265 case para_VersionID:
d7482997 266 case para_NoCite:
267 case para_Title:
268 break;
269
270 /*
271 * Chapter titles.
272 */
273 case para_Chapter:
274 case para_Appendix:
275 case para_UnnumberedChapter:
2ac8ceac 276 text_heading(&tf, p->kwtext, p->kwtext2, p->words,
d7482997 277 conf.achapter, conf.indent, conf.width);
8902e0ed 278 nesting = 0;
d7482997 279 break;
280
281 case para_Heading:
282 case para_Subsect:
2ac8ceac 283 text_heading(&tf, p->kwtext, p->kwtext2, p->words,
d7482997 284 conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
285 conf.indent, conf.width);
286 break;
287
288 case para_Rule:
2ac8ceac 289 text_rule(&tf, conf.indent + nesting, conf.width - nesting);
d7482997 290 break;
291
292 case para_Normal:
9057a0a8 293 case para_Copyright:
7136a6c7 294 case para_DescribedThing:
295 case para_Description:
d7482997 296 case para_BiblioCited:
297 case para_Bullet:
298 case para_NumberedList:
299 if (p->type == para_Bullet) {
300 prefix = &conf.bullet;
301 prefixextra = NULL;
302 indentb = conf.listindentbefore;
303 indenta = conf.listindentafter;
304 } else if (p->type == para_NumberedList) {
305 prefix = p->kwtext;
2ac8ceac 306 prefixextra = L"."; /* FIXME: configurability */
d7482997 307 indentb = conf.listindentbefore;
308 indenta = conf.listindentafter;
7136a6c7 309 } else if (p->type == para_Description) {
310 prefix = NULL;
311 prefixextra = NULL;
312 indentb = conf.listindentbefore;
313 indenta = conf.listindentafter;
d7482997 314 } else {
315 prefix = NULL;
316 prefixextra = NULL;
317 indentb = indenta = 0;
318 }
319 if (p->type == para_BiblioCited) {
320 body = dup_word_list(p->kwtext);
321 for (wp = body; wp->next; wp = wp->next);
322 wp->next = &spaceword;
323 spaceword.next = p->words;
324 spaceword.alt = NULL;
325 spaceword.type = word_WhiteSpace;
326 spaceword.text = NULL;
327 } else {
328 wp = NULL;
329 body = p->words;
330 }
2ac8ceac 331 text_para(&tf, prefix, prefixextra, body,
2614b01d 332 conf.indent + nesting + indentb, indenta,
333 conf.width - nesting - indentb - indenta);
d7482997 334 if (wp) {
335 wp->next = NULL;
336 free_word_list(body);
337 }
338 break;
339
340 case para_Code:
2ac8ceac 341 text_codepara(&tf, p->words,
2614b01d 342 conf.indent + nesting + conf.indent_code,
343 conf.width - nesting - 2 * conf.indent_code);
d7482997 344 break;
345 }
346
347 /* Do the version ID */
348 if (conf.include_version_id) {
349 for (p = sourceform; p; p = p->next)
350 if (p->type == para_VersionID)
2ac8ceac 351 text_versionid(&tf, p->words);
d7482997 352 }
353
354 /*
355 * Tidy up
356 */
2ac8ceac 357 text_output(&tf, NULL); /* end charset conversion */
358 fclose(tf.fp);
e5e6bf9d 359 sfree(conf.asect);
50d6b4bd 360 sfree(conf.filename);
d7482997 361}
362
2ac8ceac 363static void text_output(textfile *tf, const wchar_t *s)
364{
365 char buf[256];
366 int ret, len;
367 const wchar_t **sp;
368
369 if (!s) {
370 sp = NULL;
371 len = 1;
372 } else {
373 sp = &s;
374 len = ustrlen(s);
375 }
376
377 while (len > 0) {
378 ret = charset_from_unicode(sp, &len, buf, lenof(buf),
379 tf->charset, &tf->state, NULL);
380 if (!sp)
381 len = 0;
382 fwrite(buf, 1, ret, tf->fp);
d7482997 383 }
d7482997 384}
385
2ac8ceac 386static void text_output_many(textfile *tf, int n, wchar_t c)
387{
388 wchar_t s[2];
389 s[0] = c;
390 s[1] = L'\0';
391 while (n--)
392 text_output(tf, s);
393}
d7482997 394
2ac8ceac 395static void text_rdaddw(int charset, rdstring *rs, word *text, word *end) {
d7482997 396 for (; text && text != end; text = text->next) switch (text->type) {
397 case word_HyperLink:
398 case word_HyperEnd:
399 case word_UpperXref:
400 case word_LowerXref:
401 case word_XrefEnd:
402 case word_IndexRef:
403 break;
404
405 case word_Normal:
406 case word_Emph:
407 case word_Code:
408 case word_WeakCode:
409 case word_WhiteSpace:
410 case word_EmphSpace:
411 case word_CodeSpace:
412 case word_WkCodeSpace:
413 case word_Quote:
414 case word_EmphQuote:
415 case word_CodeQuote:
416 case word_WkCodeQuote:
417 assert(text->type != word_CodeQuote &&
418 text->type != word_WkCodeQuote);
419 if (towordstyle(text->type) == word_Emph &&
420 (attraux(text->aux) == attr_First ||
421 attraux(text->aux) == attr_Only))
2ac8ceac 422 rdadd(rs, L'_'); /* FIXME: configurability */
d7482997 423 else if (towordstyle(text->type) == word_Code &&
424 (attraux(text->aux) == attr_First ||
425 attraux(text->aux) == attr_Only))
2ac8ceac 426 rdadd(rs, L'`'); /* FIXME: configurability */
d7482997 427 if (removeattr(text->type) == word_Normal) {
91f93b94 428 if (cvt_ok(charset, text->text) || !text->alt)
2ac8ceac 429 rdadds(rs, text->text);
d7482997 430 else
2ac8ceac 431 text_rdaddw(charset, rs, text->alt, NULL);
d7482997 432 } else if (removeattr(text->type) == word_WhiteSpace) {
2ac8ceac 433 rdadd(rs, L' ');
d7482997 434 } else if (removeattr(text->type) == word_Quote) {
2ac8ceac 435 rdadd(rs, quoteaux(text->aux) == quote_Open ? L'`' : L'\'');
d7482997 436 /* FIXME: configurability */
437 }
438 if (towordstyle(text->type) == word_Emph &&
439 (attraux(text->aux) == attr_Last ||
440 attraux(text->aux) == attr_Only))
2ac8ceac 441 rdadd(rs, L'_'); /* FIXME: configurability */
d7482997 442 else if (towordstyle(text->type) == word_Code &&
443 (attraux(text->aux) == attr_Last ||
444 attraux(text->aux) == attr_Only))
2ac8ceac 445 rdadd(rs, L'\''); /* FIXME: configurability */
d7482997 446 break;
447 }
448}
449
43341922 450static int text_width(void *, word *);
d7482997 451
43341922 452static int text_width_list(void *ctx, word *text) {
d7482997 453 int w = 0;
454 while (text) {
43341922 455 w += text_width(ctx, text);
d7482997 456 text = text->next;
457 }
458 return w;
459}
460
43341922 461static int text_width(void *ctx, word *text) {
2ac8ceac 462 int charset = * (int *) ctx;
43341922 463
d7482997 464 switch (text->type) {
465 case word_HyperLink:
466 case word_HyperEnd:
467 case word_UpperXref:
468 case word_LowerXref:
469 case word_XrefEnd:
470 case word_IndexRef:
471 return 0;
472
473 case word_Normal:
474 case word_Emph:
475 case word_Code:
476 case word_WeakCode:
477 return (((text->type == word_Emph ||
478 text->type == word_Code)
479 ? (attraux(text->aux) == attr_Only ? 2 :
480 attraux(text->aux) == attr_Always ? 0 : 1)
481 : 0) +
91f93b94 482 (cvt_ok(charset, text->text) || !text->alt ?
e5cd393f 483 ustrwid(text->text, charset) :
43341922 484 text_width_list(ctx, text->alt)));
d7482997 485
486 case word_WhiteSpace:
487 case word_EmphSpace:
488 case word_CodeSpace:
489 case word_WkCodeSpace:
490 case word_Quote:
491 case word_EmphQuote:
492 case word_CodeQuote:
493 case word_WkCodeQuote:
494 assert(text->type != word_CodeQuote &&
495 text->type != word_WkCodeQuote);
496 return (((towordstyle(text->type) == word_Emph ||
497 towordstyle(text->type) == word_Code)
498 ? (attraux(text->aux) == attr_Only ? 2 :
499 attraux(text->aux) == attr_Always ? 0 : 1)
500 : 0) + 1);
501 }
502 return 0; /* should never happen */
503}
504
2ac8ceac 505static void text_heading(textfile *tf, word *tprefix, word *nprefix,
506 word *text, alignstruct align,
507 int indent, int width) {
508 rdstring t = { 0, 0, NULL };
d7482997 509 int margin, length;
510 int firstlinewidth, wrapwidth;
511 wrappedline *wrapping, *p;
512
513 if (align.just_numbers && nprefix) {
2ac8ceac 514 text_rdaddw(tf->charset, &t, nprefix, NULL);
515 rdadds(&t, align.number_suffix);
d7482997 516 } else if (!align.just_numbers && tprefix) {
2ac8ceac 517 text_rdaddw(tf->charset, &t, tprefix, NULL);
518 rdadds(&t, align.number_suffix);
d7482997 519 }
2ac8ceac 520 margin = length = t.pos;
d7482997 521
522 if (align.align == LEFTPLUS) {
523 margin = indent - margin;
524 if (margin < 0) margin = 0;
525 firstlinewidth = indent + width - margin - length;
526 wrapwidth = width;
527 } else if (align.align == LEFT || align.align == CENTRE) {
528 margin = 0;
529 firstlinewidth = indent + width - length;
530 wrapwidth = indent + width;
531 }
532
2ac8ceac 533 wrapping = wrap_para(text, firstlinewidth, wrapwidth,
534 text_width, &tf->charset, 0);
d7482997 535 for (p = wrapping; p; p = p->next) {
2ac8ceac 536 text_rdaddw(tf->charset, &t, p->begin, p->end);
537 length = t.pos;
d7482997 538 if (align.align == CENTRE) {
539 margin = (indent + width - length)/2;
540 if (margin < 0) margin = 0;
541 }
2ac8ceac 542 text_output_many(tf, margin, L' ');
543 text_output(tf, t.text);
544 text_output(tf, L"\n");
d7482997 545 if (align.underline != L'\0') {
2ac8ceac 546 text_output_many(tf, margin, L' ');
547 text_output_many(tf, length, align.underline);
548 text_output(tf, L"\n");
d7482997 549 }
550 if (align.align == LEFTPLUS)
551 margin = indent;
552 else
553 margin = 0;
554 sfree(t.text);
2ac8ceac 555 t = empty_rdstring;
d7482997 556 }
557 wrap_free(wrapping);
2ac8ceac 558 text_output(tf, L"\n");
d7482997 559
560 sfree(t.text);
561}
562
2ac8ceac 563static void text_rule(textfile *tf, int indent, int width) {
564 text_output_many(tf, indent, L' ');
565 text_output_many(tf, width, L'-'); /* FIXME: configurability! */
566 text_output_many(tf, 2, L'\n');
d7482997 567}
568
2ac8ceac 569static void text_para(textfile *tf, word *prefix, wchar_t *prefixextra,
570 word *text, int indent, int extraindent, int width) {
d7482997 571 wrappedline *wrapping, *p;
2ac8ceac 572 rdstring pfx = { 0, 0, NULL };
d7482997 573 int e;
574 int firstlinewidth = width;
575
576 if (prefix) {
2ac8ceac 577 text_rdaddw(tf->charset, &pfx, prefix, NULL);
d7482997 578 if (prefixextra)
2ac8ceac 579 rdadds(&pfx, prefixextra);
580 text_output_many(tf, indent, L' ');
581 text_output(tf, pfx.text);
c83c6495 582 /* If the prefix is too long, shorten the first line to fit. */
2ac8ceac 583 e = extraindent - pfx.pos;
d7482997 584 if (e < 0) {
c83c6495 585 firstlinewidth += e; /* this decreases it, since e < 0 */
d7482997 586 if (firstlinewidth < 0) {
587 e = indent + extraindent;
588 firstlinewidth = width;
2ac8ceac 589 text_output(tf, L"\n");
c83c6495 590 } else
591 e = 0;
d7482997 592 }
593 sfree(pfx.text);
594 } else
595 e = indent + extraindent;
596
2ac8ceac 597 wrapping = wrap_para(text, firstlinewidth, width,
598 text_width, &tf->charset, 0);
d7482997 599 for (p = wrapping; p; p = p->next) {
2ac8ceac 600 rdstring t = { 0, 0, NULL };
601 text_rdaddw(tf->charset, &t, p->begin, p->end);
602 text_output_many(tf, e, L' ');
603 text_output(tf, t.text);
604 text_output(tf, L"\n");
d7482997 605 e = indent + extraindent;
606 sfree(t.text);
607 }
608 wrap_free(wrapping);
2ac8ceac 609 text_output(tf, L"\n");
d7482997 610}
611
2ac8ceac 612static void text_codepara(textfile *tf, word *text, int indent, int width) {
d7482997 613 for (; text; text = text->next) if (text->type == word_WeakCode) {
2ac8ceac 614 if (ustrlen(text->text) > width) {
d7482997 615 /* FIXME: warn */
616 }
2ac8ceac 617 text_output_many(tf, indent, L' ');
618 text_output(tf, text->text);
619 text_output(tf, L"\n");
d7482997 620 }
621
2ac8ceac 622 text_output(tf, L"\n");
d7482997 623}
624
2ac8ceac 625static void text_versionid(textfile *tf, word *text) {
626 rdstring t = { 0, 0, NULL };
d7482997 627
2ac8ceac 628 rdadd(&t, L'['); /* FIXME: configurability */
629 text_rdaddw(tf->charset, &t, text, NULL);
630 rdadd(&t, L']'); /* FIXME: configurability */
631 rdadd(&t, L'\n');
d7482997 632
2ac8ceac 633 text_output(tf, t.text);
d7482997 634 sfree(t.text);
635}