2 * text backend for Halibut
10 typedef enum { LEFT
, LEFTPLUS
, CENTRE
} alignment
;
15 wchar_t *number_suffix
;
19 int indent
, indent_code
;
20 int listindentbefore
, listindentafter
;
22 alignstruct atitle
, achapter
, *asect
;
24 int include_version_id
;
28 wchar_t *lquote
, *rquote
, *rule
;
30 wchar_t *listsuffix
, *startemph
, *endemph
;
39 static void text_heading(textfile
*, word
*, word
*, word
*, alignstruct
,
40 int, int, textconfig
*);
41 static void text_rule(textfile
*, int, int, textconfig
*);
42 static void text_para(textfile
*, word
*, wchar_t *, word
*, int, int, int,
44 static void text_codepara(textfile
*, word
*, int, int);
45 static void text_versionid(textfile
*, word
*, textconfig
*);
47 static void text_output(textfile
*, const wchar_t *);
48 static void text_output_many(textfile
*, int, wchar_t);
50 static alignment
utoalign(wchar_t *p
) {
51 if (!ustricmp(p
, L
"centre") || !ustricmp(p
, L
"center"))
53 if (!ustricmp(p
, L
"leftplus"))
58 static textconfig
text_configure(paragraph
*source
) {
66 ret
.bullet
.next
= NULL
;
67 ret
.bullet
.alt
= NULL
;
68 ret
.bullet
.type
= word_Normal
;
69 ret
.atitle
.just_numbers
= FALSE
; /* ignored */
76 ret
.listindentbefore
= 1;
77 ret
.listindentafter
= 3;
79 ret
.atitle
.align
= CENTRE
;
80 ret
.atitle
.underline
= L
"\x2550\0=\0\0";
81 ret
.achapter
.align
= LEFT
;
82 ret
.achapter
.just_numbers
= FALSE
;
83 ret
.achapter
.number_suffix
= L
": ";
84 ret
.achapter
.underline
= L
"\x203E\0-\0\0";
86 ret
.asect
= mknewa(alignstruct
, ret
.nasect
);
87 ret
.asect
[0].align
= LEFTPLUS
;
88 ret
.asect
[0].just_numbers
= TRUE
;
89 ret
.asect
[0].number_suffix
= L
" ";
90 ret
.asect
[0].underline
= L
"\0";
91 ret
.include_version_id
= TRUE
;
92 ret
.indent_preambles
= FALSE
;
93 ret
.bullet
.text
= L
"\x2022\0-\0\0";
94 ret
.rule
= L
"\x2500\0-\0\0";
95 ret
.filename
= dupstr("output.txt");
96 ret
.startemph
= ret
.endemph
= L
"_";
97 ret
.listsuffix
= L
".";
98 ret
.charset
= CS_ASCII
;
100 * Default quote characters are Unicode matched single quotes,
101 * falling back to the TeXlike `'.
103 ret
.lquote
= L
"\x2018\0\x2019\0`\0'\0\0";
104 ret
.rquote
= uadv(ret
.lquote
);
107 * Two-pass configuration so that we can pick up global config
108 * (e.g. `quotes') before having it overridden by specific
109 * config (`text-quotes'), irrespective of the order in which
112 for (p
= source
; p
; p
= p
->next
) {
113 if (p
->type
== para_Config
) {
114 if (!ustricmp(p
->keyword
, L
"quotes")) {
115 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
116 ret
.lquote
= uadv(p
->keyword
);
117 ret
.rquote
= uadv(ret
.lquote
);
123 for (p
= source
; p
; p
= p
->next
) {
124 if (p
->type
== para_Config
) {
125 if (!ustricmp(p
->keyword
, L
"text-indent")) {
126 ret
.indent
= utoi(uadv(p
->keyword
));
127 } else if (!ustricmp(p
->keyword
, L
"text-charset")) {
128 char *csname
= utoa_dup(uadv(p
->keyword
), CS_ASCII
);
129 ret
.charset
= charset_from_localenc(csname
);
131 } else if (!ustricmp(p
->keyword
, L
"text-filename")) {
133 ret
.filename
= dupstr(adv(p
->origkeyword
));
134 } else if (!ustricmp(p
->keyword
, L
"text-indent-code")) {
135 ret
.indent_code
= utoi(uadv(p
->keyword
));
136 } else if (!ustricmp(p
->keyword
, L
"text-width")) {
137 ret
.width
= utoi(uadv(p
->keyword
));
138 } else if (!ustricmp(p
->keyword
, L
"text-list-indent")) {
139 ret
.listindentbefore
= utoi(uadv(p
->keyword
));
140 } else if (!ustricmp(p
->keyword
, L
"text-listitem-indent")) {
141 ret
.listindentafter
= utoi(uadv(p
->keyword
));
142 } else if (!ustricmp(p
->keyword
, L
"text-chapter-align")) {
143 ret
.achapter
.align
= utoalign(uadv(p
->keyword
));
144 } else if (!ustricmp(p
->keyword
, L
"text-chapter-underline")) {
145 ret
.achapter
.underline
= uadv(p
->keyword
);
146 } else if (!ustricmp(p
->keyword
, L
"text-chapter-numeric")) {
147 ret
.achapter
.just_numbers
= utob(uadv(p
->keyword
));
148 } else if (!ustricmp(p
->keyword
, L
"text-chapter-suffix")) {
149 ret
.achapter
.number_suffix
= uadv(p
->keyword
);
150 } else if (!ustricmp(p
->keyword
, L
"text-section-align")) {
151 wchar_t *q
= uadv(p
->keyword
);
157 if (n
>= ret
.nasect
) {
159 ret
.asect
= resize(ret
.asect
, n
+1);
160 for (i
= ret
.nasect
; i
<= n
; i
++)
161 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
164 ret
.asect
[n
].align
= utoalign(q
);
165 } else if (!ustricmp(p
->keyword
, L
"text-section-underline")) {
166 wchar_t *q
= uadv(p
->keyword
);
172 if (n
>= ret
.nasect
) {
174 ret
.asect
= resize(ret
.asect
, n
+1);
175 for (i
= ret
.nasect
; i
<= n
; i
++)
176 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
179 ret
.asect
[n
].underline
= q
;
180 } else if (!ustricmp(p
->keyword
, L
"text-section-numeric")) {
181 wchar_t *q
= uadv(p
->keyword
);
187 if (n
>= ret
.nasect
) {
189 ret
.asect
= resize(ret
.asect
, n
+1);
190 for (i
= ret
.nasect
; i
<= n
; i
++)
191 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
194 ret
.asect
[n
].just_numbers
= utob(q
);
195 } else if (!ustricmp(p
->keyword
, L
"text-section-suffix")) {
196 wchar_t *q
= uadv(p
->keyword
);
202 if (n
>= ret
.nasect
) {
204 ret
.asect
= resize(ret
.asect
, n
+1);
205 for (i
= ret
.nasect
; i
<= n
; i
++) {
206 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
210 ret
.asect
[n
].number_suffix
= q
;
211 } else if (!ustricmp(p
->keyword
, L
"text-title-align")) {
212 ret
.atitle
.align
= utoalign(uadv(p
->keyword
));
213 } else if (!ustricmp(p
->keyword
, L
"text-title-underline")) {
214 ret
.atitle
.underline
= uadv(p
->keyword
);
215 } else if (!ustricmp(p
->keyword
, L
"text-versionid")) {
216 ret
.include_version_id
= utob(uadv(p
->keyword
));
217 } else if (!ustricmp(p
->keyword
, L
"text-indent-preamble")) {
218 ret
.indent_preambles
= utob(uadv(p
->keyword
));
219 } else if (!ustricmp(p
->keyword
, L
"text-bullet")) {
220 ret
.bullet
.text
= uadv(p
->keyword
);
221 } else if (!ustricmp(p
->keyword
, L
"text-rule")) {
222 ret
.rule
= uadv(p
->keyword
);
223 } else if (!ustricmp(p
->keyword
, L
"text-list-suffix")) {
224 ret
.listsuffix
= uadv(p
->keyword
);
225 } else if (!ustricmp(p
->keyword
, L
"text-emphasis")) {
226 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
227 ret
.startemph
= uadv(p
->keyword
);
228 ret
.endemph
= uadv(ret
.startemph
);
230 } else if (!ustricmp(p
->keyword
, L
"text-quotes")) {
231 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
232 ret
.lquote
= uadv(p
->keyword
);
233 ret
.rquote
= uadv(ret
.lquote
);
240 * Now process fallbacks on quote characters, underlines, the
241 * rule character, the emphasis characters, and bullets.
243 while (*uadv(ret
.rquote
) && *uadv(uadv(ret
.rquote
)) &&
244 (!cvt_ok(ret
.charset
, ret
.lquote
) ||
245 !cvt_ok(ret
.charset
, ret
.rquote
))) {
246 ret
.lquote
= uadv(ret
.rquote
);
247 ret
.rquote
= uadv(ret
.lquote
);
250 while (*uadv(ret
.endemph
) && *uadv(uadv(ret
.endemph
)) &&
251 (!cvt_ok(ret
.charset
, ret
.startemph
) ||
252 !cvt_ok(ret
.charset
, ret
.endemph
))) {
253 ret
.startemph
= uadv(ret
.endemph
);
254 ret
.endemph
= uadv(ret
.startemph
);
257 while (*ret
.atitle
.underline
&& *uadv(ret
.atitle
.underline
) &&
258 !cvt_ok(ret
.charset
, ret
.atitle
.underline
))
259 ret
.atitle
.underline
= uadv(ret
.atitle
.underline
);
261 while (*ret
.achapter
.underline
&& *uadv(ret
.achapter
.underline
) &&
262 !cvt_ok(ret
.charset
, ret
.achapter
.underline
))
263 ret
.achapter
.underline
= uadv(ret
.achapter
.underline
);
265 for (n
= 0; n
< ret
.nasect
; n
++) {
266 while (*ret
.asect
[n
].underline
&& *uadv(ret
.asect
[n
].underline
) &&
267 !cvt_ok(ret
.charset
, ret
.asect
[n
].underline
))
268 ret
.asect
[n
].underline
= uadv(ret
.asect
[n
].underline
);
271 while (*ret
.bullet
.text
&& *uadv(ret
.bullet
.text
) &&
272 !cvt_ok(ret
.charset
, ret
.bullet
.text
))
273 ret
.bullet
.text
= uadv(ret
.bullet
.text
);
275 while (*ret
.rule
&& *uadv(ret
.rule
) &&
276 !cvt_ok(ret
.charset
, ret
.rule
))
277 ret
.rule
= uadv(ret
.rule
);
282 paragraph
*text_config_filename(char *filename
)
284 return cmdline_cfg_simple("text-filename", filename
, NULL
);
287 void text_backend(paragraph
*sourceform
, keywordlist
*keywords
,
288 indexdata
*idx
, void *unused
) {
291 word
*prefix
, *body
, *wp
;
294 wchar_t *prefixextra
;
295 int nesting
, nestindent
;
296 int indentb
, indenta
;
299 IGNORE(keywords
); /* we don't happen to need this */
300 IGNORE(idx
); /* or this */
302 conf
= text_configure(sourceform
);
305 * Open the output file.
307 tf
.fp
= fopen(conf
.filename
, "w");
309 error(err_cantopenw
, conf
.filename
);
312 tf
.charset
= conf
.charset
;
313 tf
.state
= charset_init_state
;
316 for (p
= sourceform
; p
; p
= p
->next
)
317 if (p
->type
== para_Title
)
318 text_heading(&tf
, NULL
, NULL
, p
->words
,
319 conf
.atitle
, conf
.indent
, conf
.width
, &conf
);
321 nestindent
= conf
.listindentbefore
+ conf
.listindentafter
;
322 nesting
= (conf
.indent_preambles ?
0 : -conf
.indent
);
324 /* Do the main document */
325 for (p
= sourceform
; p
; p
= p
->next
) switch (p
->type
) {
332 assert(nesting
>= 0);
336 nesting
+= nestindent
;
339 nesting
-= nestindent
;
340 assert(nesting
>= 0);
344 * Things we ignore because we've already processed them or
345 * aren't going to touch them in this pass.
349 case para_Biblio
: /* only touch BiblioCited */
360 case para_UnnumberedChapter
:
361 text_heading(&tf
, p
->kwtext
, p
->kwtext2
, p
->words
,
362 conf
.achapter
, conf
.indent
, conf
.width
, &conf
);
368 text_heading(&tf
, p
->kwtext
, p
->kwtext2
, p
->words
,
369 conf
.asect
[p
->aux
>=conf
.nasect ? conf
.nasect
-1 : p
->aux
],
370 conf
.indent
, conf
.width
, &conf
);
374 text_rule(&tf
, conf
.indent
+ nesting
, conf
.width
- nesting
, &conf
);
379 case para_DescribedThing
:
380 case para_Description
:
381 case para_BiblioCited
:
383 case para_NumberedList
:
384 if (p
->type
== para_Bullet
) {
385 prefix
= &conf
.bullet
;
387 indentb
= conf
.listindentbefore
;
388 indenta
= conf
.listindentafter
;
389 } else if (p
->type
== para_NumberedList
) {
391 prefixextra
= conf
.listsuffix
;
392 indentb
= conf
.listindentbefore
;
393 indenta
= conf
.listindentafter
;
394 } else if (p
->type
== para_Description
) {
397 indentb
= conf
.listindentbefore
;
398 indenta
= conf
.listindentafter
;
402 indentb
= indenta
= 0;
404 if (p
->type
== para_BiblioCited
) {
405 body
= dup_word_list(p
->kwtext
);
406 for (wp
= body
; wp
->next
; wp
= wp
->next
);
407 wp
->next
= &spaceword
;
408 spaceword
.next
= p
->words
;
409 spaceword
.alt
= NULL
;
410 spaceword
.type
= word_WhiteSpace
;
411 spaceword
.text
= NULL
;
416 text_para(&tf
, prefix
, prefixextra
, body
,
417 conf
.indent
+ nesting
+ indentb
, indenta
,
418 conf
.width
- nesting
- indentb
- indenta
, &conf
);
421 free_word_list(body
);
426 text_codepara(&tf
, p
->words
,
427 conf
.indent
+ nesting
+ conf
.indent_code
,
428 conf
.width
- nesting
- 2 * conf
.indent_code
);
432 /* Do the version ID */
433 if (conf
.include_version_id
) {
434 for (p
= sourceform
; p
; p
= p
->next
)
435 if (p
->type
== para_VersionID
)
436 text_versionid(&tf
, p
->words
, &conf
);
442 text_output(&tf
, NULL
); /* end charset conversion */
445 sfree(conf
.filename
);
448 static void text_output(textfile
*tf
, const wchar_t *s
)
463 ret
= charset_from_unicode(sp
, &len
, buf
, lenof(buf
),
464 tf
->charset
, &tf
->state
, NULL
);
467 fwrite(buf
, 1, ret
, tf
->fp
);
471 static void text_output_many(textfile
*tf
, int n
, wchar_t c
)
480 static void text_rdaddw(rdstring
*rs
, word
*text
, word
*end
, textconfig
*cfg
) {
481 for (; text
&& text
!= end
; text
= text
->next
) switch (text
->type
) {
494 case word_WhiteSpace
:
497 case word_WkCodeSpace
:
501 case word_WkCodeQuote
:
502 assert(text
->type
!= word_CodeQuote
&&
503 text
->type
!= word_WkCodeQuote
);
504 if (towordstyle(text
->type
) == word_Emph
&&
505 (attraux(text
->aux
) == attr_First
||
506 attraux(text
->aux
) == attr_Only
))
507 rdadds(rs
, cfg
->startemph
);
508 else if (towordstyle(text
->type
) == word_Code
&&
509 (attraux(text
->aux
) == attr_First
||
510 attraux(text
->aux
) == attr_Only
))
511 rdadds(rs
, cfg
->lquote
);
512 if (removeattr(text
->type
) == word_Normal
) {
513 if (cvt_ok(cfg
->charset
, text
->text
) || !text
->alt
)
514 rdadds(rs
, text
->text
);
516 text_rdaddw(rs
, text
->alt
, NULL
, cfg
);
517 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
519 } else if (removeattr(text
->type
) == word_Quote
) {
520 rdadds(rs
, quoteaux(text
->aux
) == quote_Open ?
521 cfg
->lquote
: cfg
->rquote
);
523 if (towordstyle(text
->type
) == word_Emph
&&
524 (attraux(text
->aux
) == attr_Last
||
525 attraux(text
->aux
) == attr_Only
))
526 rdadds(rs
, cfg
->endemph
);
527 else if (towordstyle(text
->type
) == word_Code
&&
528 (attraux(text
->aux
) == attr_Last
||
529 attraux(text
->aux
) == attr_Only
))
530 rdadds(rs
, cfg
->rquote
);
535 static int text_width(void *, word
*);
537 static int text_width_list(void *ctx
, word
*text
) {
540 w
+= text_width(ctx
, text
);
546 static int text_width(void *ctx
, word
*text
) {
547 textconfig
*cfg
= (textconfig
*)ctx
;
551 switch (text
->type
) {
561 assert(text
->type
< word_internal_endattrs
);
564 attr
= towordstyle(text
->type
);
565 if (attr
== word_Emph
|| attr
== word_Code
) {
566 if (attraux(text
->aux
) == attr_Only
||
567 attraux(text
->aux
) == attr_First
)
568 wid
+= ustrwid(attr
== word_Emph ? cfg
->startemph
: cfg
->lquote
,
571 if (attr
== word_Emph
|| attr
== word_Code
) {
572 if (attraux(text
->aux
) == attr_Only
||
573 attraux(text
->aux
) == attr_Last
)
574 wid
+= ustrwid(attr
== word_Emph ? cfg
->startemph
: cfg
->lquote
,
578 switch (text
->type
) {
583 if (cvt_ok(cfg
->charset
, text
->text
) || !text
->alt
)
584 wid
+= ustrwid(text
->text
, cfg
->charset
);
586 wid
+= text_width_list(ctx
, text
->alt
);
589 case word_WhiteSpace
:
592 case word_WkCodeSpace
:
596 case word_WkCodeQuote
:
597 assert(text
->type
!= word_CodeQuote
&&
598 text
->type
!= word_WkCodeQuote
);
599 if (removeattr(text
->type
) == word_Quote
) {
600 if (quoteaux(text
->aux
) == quote_Open
)
601 wid
+= ustrwid(cfg
->lquote
, cfg
->charset
);
603 wid
+= ustrwid(cfg
->rquote
, cfg
->charset
);
611 static void text_heading(textfile
*tf
, word
*tprefix
, word
*nprefix
,
612 word
*text
, alignstruct align
,
613 int indent
, int width
, textconfig
*cfg
) {
614 rdstring t
= { 0, 0, NULL
};
616 int firstlinewidth
, wrapwidth
;
617 wrappedline
*wrapping
, *p
;
619 if (align
.just_numbers
&& nprefix
) {
620 text_rdaddw(&t
, nprefix
, NULL
, cfg
);
621 rdadds(&t
, align
.number_suffix
);
622 } else if (!align
.just_numbers
&& tprefix
) {
623 text_rdaddw(&t
, tprefix
, NULL
, cfg
);
624 rdadds(&t
, align
.number_suffix
);
626 margin
= length
= ustrwid(t
.text ? t
.text
: L
"", cfg
->charset
);
628 if (align
.align
== LEFTPLUS
) {
629 margin
= indent
- margin
;
630 if (margin
< 0) margin
= 0;
631 firstlinewidth
= indent
+ width
- margin
- length
;
633 } else if (align
.align
== LEFT
|| align
.align
== CENTRE
) {
635 firstlinewidth
= indent
+ width
- length
;
636 wrapwidth
= indent
+ width
;
639 wrapping
= wrap_para(text
, firstlinewidth
, wrapwidth
,
641 for (p
= wrapping
; p
; p
= p
->next
) {
642 text_rdaddw(&t
, p
->begin
, p
->end
, cfg
);
643 length
= ustrwid(t
.text ? t
.text
: L
"", cfg
->charset
);
644 if (align
.align
== CENTRE
) {
645 margin
= (indent
+ width
- length
)/2;
646 if (margin
< 0) margin
= 0;
648 text_output_many(tf
, margin
, L
' ');
649 text_output(tf
, t
.text
);
650 text_output(tf
, L
"\n");
651 if (*align
.underline
) {
652 text_output_many(tf
, margin
, L
' ');
654 text_output(tf
, align
.underline
);
655 length
-= ustrwid(align
.underline
, cfg
->charset
);
657 text_output(tf
, L
"\n");
659 if (align
.align
== LEFTPLUS
)
667 text_output(tf
, L
"\n");
672 static void text_rule(textfile
*tf
, int indent
, int width
, textconfig
*cfg
) {
673 text_output_many(tf
, indent
, L
' ');
675 text_output(tf
, cfg
->rule
);
676 width
-= ustrwid(cfg
->rule
, cfg
->charset
);
678 text_output_many(tf
, 2, L
'\n');
681 static void text_para(textfile
*tf
, word
*prefix
, wchar_t *prefixextra
,
682 word
*text
, int indent
, int extraindent
, int width
,
684 wrappedline
*wrapping
, *p
;
685 rdstring pfx
= { 0, 0, NULL
};
687 int firstlinewidth
= width
;
690 text_rdaddw(&pfx
, prefix
, NULL
, cfg
);
692 rdadds(&pfx
, prefixextra
);
693 text_output_many(tf
, indent
, L
' ');
694 text_output(tf
, pfx
.text
);
695 /* If the prefix is too long, shorten the first line to fit. */
696 e
= extraindent
- ustrwid(pfx
.text ? pfx
.text
: L
"", cfg
->charset
);
698 firstlinewidth
+= e
; /* this decreases it, since e < 0 */
699 if (firstlinewidth
< 0) {
700 e
= indent
+ extraindent
;
701 firstlinewidth
= width
;
702 text_output(tf
, L
"\n");
708 e
= indent
+ extraindent
;
710 wrapping
= wrap_para(text
, firstlinewidth
, width
,
712 for (p
= wrapping
; p
; p
= p
->next
) {
713 rdstring t
= { 0, 0, NULL
};
714 text_rdaddw(&t
, p
->begin
, p
->end
, cfg
);
715 text_output_many(tf
, e
, L
' ');
716 text_output(tf
, t
.text
);
717 text_output(tf
, L
"\n");
718 e
= indent
+ extraindent
;
722 text_output(tf
, L
"\n");
725 static void text_codepara(textfile
*tf
, word
*text
, int indent
, int width
) {
726 for (; text
; text
= text
->next
) if (text
->type
== word_WeakCode
) {
727 int wid
= ustrwid(text
->text
, tf
->charset
);
729 error(err_text_codeline
, &text
->fpos
, wid
, width
);
730 text_output_many(tf
, indent
, L
' ');
731 text_output(tf
, text
->text
);
732 text_output(tf
, L
"\n");
735 text_output(tf
, L
"\n");
738 static void text_versionid(textfile
*tf
, word
*text
, textconfig
*cfg
) {
739 rdstring t
= { 0, 0, NULL
};
742 text_rdaddw(&t
, text
, NULL
, cfg
);
746 text_output(tf
, t
.text
);