2 * text backend for Halibut
10 typedef enum { LEFT
, LEFTPLUS
, CENTRE
} alignment
;
15 wchar_t *number_suffix
;
19 int indent
, indent_code
;
20 int listindentbefore
, listindentafter
;
22 alignstruct atitle
, achapter
, *asect
;
24 int include_version_id
;
28 wchar_t *lquote
, *rquote
, *rule
;
30 wchar_t *listsuffix
, *startemph
, *endemph
;
39 static void text_heading(textfile
*, word
*, word
*, word
*, alignstruct
,
40 int, int, textconfig
*);
41 static void text_rule(textfile
*, int, int, textconfig
*);
42 static void text_para(textfile
*, word
*, wchar_t *, word
*, int, int, int,
44 static void text_codepara(textfile
*, word
*, int, int);
45 static void text_versionid(textfile
*, word
*, textconfig
*);
47 static void text_output(textfile
*, const wchar_t *);
48 static void text_output_many(textfile
*, int, wchar_t);
50 static alignment
utoalign(wchar_t *p
) {
51 if (!ustricmp(p
, L
"centre") || !ustricmp(p
, L
"center"))
53 if (!ustricmp(p
, L
"leftplus"))
58 static textconfig
text_configure(paragraph
*source
) {
66 ret
.bullet
.next
= NULL
;
67 ret
.bullet
.alt
= NULL
;
68 ret
.bullet
.type
= word_Normal
;
69 ret
.atitle
.just_numbers
= FALSE
; /* ignored */
76 ret
.listindentbefore
= 1;
77 ret
.listindentafter
= 3;
79 ret
.atitle
.align
= CENTRE
;
80 ret
.atitle
.underline
= L
"\x2550\0=\0\0";
81 ret
.achapter
.align
= LEFT
;
82 ret
.achapter
.just_numbers
= FALSE
;
83 ret
.achapter
.number_suffix
= L
": ";
84 ret
.achapter
.underline
= L
"\x203E\0-\0\0";
86 ret
.asect
= snewn(ret
.nasect
, alignstruct
);
87 ret
.asect
[0].align
= LEFTPLUS
;
88 ret
.asect
[0].just_numbers
= TRUE
;
89 ret
.asect
[0].number_suffix
= L
" ";
90 ret
.asect
[0].underline
= L
"\0";
91 ret
.include_version_id
= TRUE
;
92 ret
.indent_preambles
= FALSE
;
93 ret
.bullet
.text
= L
"\x2022\0-\0\0";
94 ret
.rule
= L
"\x2500\0-\0\0";
95 ret
.filename
= dupstr("output.txt");
96 ret
.startemph
= L
"_\0_\0\0";
97 ret
.endemph
= uadv(ret
.startemph
);
98 ret
.listsuffix
= L
".";
99 ret
.charset
= CS_ASCII
;
101 * Default quote characters are Unicode matched single quotes,
102 * falling back to the TeXlike `'.
104 ret
.lquote
= L
"\x2018\0\x2019\0`\0'\0\0";
105 ret
.rquote
= uadv(ret
.lquote
);
108 * Two-pass configuration so that we can pick up global config
109 * (e.g. `quotes') before having it overridden by specific
110 * config (`text-quotes'), irrespective of the order in which
113 for (p
= source
; p
; p
= p
->next
) {
114 if (p
->type
== para_Config
) {
115 if (!ustricmp(p
->keyword
, L
"quotes")) {
116 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
117 ret
.lquote
= uadv(p
->keyword
);
118 ret
.rquote
= uadv(ret
.lquote
);
124 for (p
= source
; p
; p
= p
->next
) {
125 if (p
->type
== para_Config
) {
126 if (!ustricmp(p
->keyword
, L
"text-indent")) {
127 ret
.indent
= utoi(uadv(p
->keyword
));
128 } else if (!ustricmp(p
->keyword
, L
"text-charset")) {
129 ret
.charset
= charset_from_ustr(&p
->fpos
, uadv(p
->keyword
));
130 } else if (!ustricmp(p
->keyword
, L
"text-filename")) {
132 ret
.filename
= dupstr(adv(p
->origkeyword
));
133 } else if (!ustricmp(p
->keyword
, L
"text-indent-code")) {
134 ret
.indent_code
= utoi(uadv(p
->keyword
));
135 } else if (!ustricmp(p
->keyword
, L
"text-width")) {
136 ret
.width
= utoi(uadv(p
->keyword
));
137 } else if (!ustricmp(p
->keyword
, L
"text-list-indent")) {
138 ret
.listindentbefore
= utoi(uadv(p
->keyword
));
139 } else if (!ustricmp(p
->keyword
, L
"text-listitem-indent")) {
140 ret
.listindentafter
= utoi(uadv(p
->keyword
));
141 } else if (!ustricmp(p
->keyword
, L
"text-chapter-align")) {
142 ret
.achapter
.align
= utoalign(uadv(p
->keyword
));
143 } else if (!ustricmp(p
->keyword
, L
"text-chapter-underline")) {
144 ret
.achapter
.underline
= uadv(p
->keyword
);
145 } else if (!ustricmp(p
->keyword
, L
"text-chapter-numeric")) {
146 ret
.achapter
.just_numbers
= utob(uadv(p
->keyword
));
147 } else if (!ustricmp(p
->keyword
, L
"text-chapter-suffix")) {
148 ret
.achapter
.number_suffix
= uadv(p
->keyword
);
149 } else if (!ustricmp(p
->keyword
, L
"text-section-align")) {
150 wchar_t *q
= uadv(p
->keyword
);
156 if (n
>= ret
.nasect
) {
158 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
159 for (i
= ret
.nasect
; i
<= n
; i
++)
160 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
163 ret
.asect
[n
].align
= utoalign(q
);
164 } else if (!ustricmp(p
->keyword
, L
"text-section-underline")) {
165 wchar_t *q
= uadv(p
->keyword
);
171 if (n
>= ret
.nasect
) {
173 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
174 for (i
= ret
.nasect
; i
<= n
; i
++)
175 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
178 ret
.asect
[n
].underline
= q
;
179 } else if (!ustricmp(p
->keyword
, L
"text-section-numeric")) {
180 wchar_t *q
= uadv(p
->keyword
);
186 if (n
>= ret
.nasect
) {
188 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
189 for (i
= ret
.nasect
; i
<= n
; i
++)
190 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
193 ret
.asect
[n
].just_numbers
= utob(q
);
194 } else if (!ustricmp(p
->keyword
, L
"text-section-suffix")) {
195 wchar_t *q
= uadv(p
->keyword
);
201 if (n
>= ret
.nasect
) {
203 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
204 for (i
= ret
.nasect
; i
<= n
; i
++) {
205 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
209 ret
.asect
[n
].number_suffix
= q
;
210 } else if (!ustricmp(p
->keyword
, L
"text-title-align")) {
211 ret
.atitle
.align
= utoalign(uadv(p
->keyword
));
212 } else if (!ustricmp(p
->keyword
, L
"text-title-underline")) {
213 ret
.atitle
.underline
= uadv(p
->keyword
);
214 } else if (!ustricmp(p
->keyword
, L
"text-versionid")) {
215 ret
.include_version_id
= utob(uadv(p
->keyword
));
216 } else if (!ustricmp(p
->keyword
, L
"text-indent-preamble")) {
217 ret
.indent_preambles
= utob(uadv(p
->keyword
));
218 } else if (!ustricmp(p
->keyword
, L
"text-bullet")) {
219 ret
.bullet
.text
= uadv(p
->keyword
);
220 } else if (!ustricmp(p
->keyword
, L
"text-rule")) {
221 ret
.rule
= uadv(p
->keyword
);
222 } else if (!ustricmp(p
->keyword
, L
"text-list-suffix")) {
223 ret
.listsuffix
= uadv(p
->keyword
);
224 } else if (!ustricmp(p
->keyword
, L
"text-emphasis")) {
225 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
226 ret
.startemph
= uadv(p
->keyword
);
227 ret
.endemph
= uadv(ret
.startemph
);
229 } else if (!ustricmp(p
->keyword
, L
"text-quotes")) {
230 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
231 ret
.lquote
= uadv(p
->keyword
);
232 ret
.rquote
= uadv(ret
.lquote
);
239 * Now process fallbacks on quote characters, underlines, the
240 * rule character, the emphasis characters, and bullets.
242 while (*uadv(ret
.rquote
) && *uadv(uadv(ret
.rquote
)) &&
243 (!cvt_ok(ret
.charset
, ret
.lquote
) ||
244 !cvt_ok(ret
.charset
, ret
.rquote
))) {
245 ret
.lquote
= uadv(ret
.rquote
);
246 ret
.rquote
= uadv(ret
.lquote
);
249 while (*uadv(ret
.endemph
) && *uadv(uadv(ret
.endemph
)) &&
250 (!cvt_ok(ret
.charset
, ret
.startemph
) ||
251 !cvt_ok(ret
.charset
, ret
.endemph
))) {
252 ret
.startemph
= uadv(ret
.endemph
);
253 ret
.endemph
= uadv(ret
.startemph
);
256 while (*ret
.atitle
.underline
&& *uadv(ret
.atitle
.underline
) &&
257 !cvt_ok(ret
.charset
, ret
.atitle
.underline
))
258 ret
.atitle
.underline
= uadv(ret
.atitle
.underline
);
260 while (*ret
.achapter
.underline
&& *uadv(ret
.achapter
.underline
) &&
261 !cvt_ok(ret
.charset
, ret
.achapter
.underline
))
262 ret
.achapter
.underline
= uadv(ret
.achapter
.underline
);
264 for (n
= 0; n
< ret
.nasect
; n
++) {
265 while (*ret
.asect
[n
].underline
&& *uadv(ret
.asect
[n
].underline
) &&
266 !cvt_ok(ret
.charset
, ret
.asect
[n
].underline
))
267 ret
.asect
[n
].underline
= uadv(ret
.asect
[n
].underline
);
270 while (*ret
.bullet
.text
&& *uadv(ret
.bullet
.text
) &&
271 !cvt_ok(ret
.charset
, ret
.bullet
.text
))
272 ret
.bullet
.text
= uadv(ret
.bullet
.text
);
274 while (*ret
.rule
&& *uadv(ret
.rule
) &&
275 !cvt_ok(ret
.charset
, ret
.rule
))
276 ret
.rule
= uadv(ret
.rule
);
281 paragraph
*text_config_filename(char *filename
)
283 return cmdline_cfg_simple("text-filename", filename
, NULL
);
286 void text_backend(paragraph
*sourceform
, keywordlist
*keywords
,
287 indexdata
*idx
, void *unused
) {
290 word
*prefix
, *body
, *wp
;
293 wchar_t *prefixextra
;
294 int nesting
, nestindent
;
295 int indentb
, indenta
;
298 IGNORE(keywords
); /* we don't happen to need this */
299 IGNORE(idx
); /* or this */
301 conf
= text_configure(sourceform
);
304 * Open the output file.
306 tf
.fp
= fopen(conf
.filename
, "w");
308 error(err_cantopenw
, conf
.filename
);
311 tf
.charset
= conf
.charset
;
312 tf
.state
= charset_init_state
;
315 for (p
= sourceform
; p
; p
= p
->next
)
316 if (p
->type
== para_Title
)
317 text_heading(&tf
, NULL
, NULL
, p
->words
,
318 conf
.atitle
, conf
.indent
, conf
.width
, &conf
);
320 nestindent
= conf
.listindentbefore
+ conf
.listindentafter
;
321 nesting
= (conf
.indent_preambles ?
0 : -conf
.indent
);
323 /* Do the main document */
324 for (p
= sourceform
; p
; p
= p
->next
) switch (p
->type
) {
331 assert(nesting
>= 0);
335 nesting
+= nestindent
;
338 nesting
-= nestindent
;
339 assert(nesting
>= 0);
343 * Things we ignore because we've already processed them or
344 * aren't going to touch them in this pass.
348 case para_Biblio
: /* only touch BiblioCited */
359 case para_UnnumberedChapter
:
360 text_heading(&tf
, p
->kwtext
, p
->kwtext2
, p
->words
,
361 conf
.achapter
, conf
.indent
, conf
.width
, &conf
);
367 text_heading(&tf
, p
->kwtext
, p
->kwtext2
, p
->words
,
368 conf
.asect
[p
->aux
>=conf
.nasect ? conf
.nasect
-1 : p
->aux
],
369 conf
.indent
, conf
.width
, &conf
);
373 text_rule(&tf
, conf
.indent
+ nesting
, conf
.width
- nesting
, &conf
);
378 case para_DescribedThing
:
379 case para_Description
:
380 case para_BiblioCited
:
382 case para_NumberedList
:
383 if (p
->type
== para_Bullet
) {
384 prefix
= &conf
.bullet
;
386 indentb
= conf
.listindentbefore
;
387 indenta
= conf
.listindentafter
;
388 } else if (p
->type
== para_NumberedList
) {
390 prefixextra
= conf
.listsuffix
;
391 indentb
= conf
.listindentbefore
;
392 indenta
= conf
.listindentafter
;
393 } else if (p
->type
== para_Description
) {
396 indentb
= conf
.listindentbefore
;
397 indenta
= conf
.listindentafter
;
401 indentb
= indenta
= 0;
403 if (p
->type
== para_BiblioCited
) {
404 body
= dup_word_list(p
->kwtext
);
405 for (wp
= body
; wp
->next
; wp
= wp
->next
);
406 wp
->next
= &spaceword
;
407 spaceword
.next
= p
->words
;
408 spaceword
.alt
= NULL
;
409 spaceword
.type
= word_WhiteSpace
;
410 spaceword
.text
= NULL
;
415 text_para(&tf
, prefix
, prefixextra
, body
,
416 conf
.indent
+ nesting
+ indentb
, indenta
,
417 conf
.width
- nesting
- indentb
- indenta
, &conf
);
420 free_word_list(body
);
425 text_codepara(&tf
, p
->words
,
426 conf
.indent
+ nesting
+ conf
.indent_code
,
427 conf
.width
- nesting
- 2 * conf
.indent_code
);
431 /* Do the version ID */
432 if (conf
.include_version_id
) {
433 for (p
= sourceform
; p
; p
= p
->next
)
434 if (p
->type
== para_VersionID
)
435 text_versionid(&tf
, p
->words
, &conf
);
441 text_output(&tf
, NULL
); /* end charset conversion */
444 sfree(conf
.filename
);
447 static void text_output(textfile
*tf
, const wchar_t *s
)
462 ret
= charset_from_unicode(sp
, &len
, buf
, lenof(buf
),
463 tf
->charset
, &tf
->state
, NULL
);
466 fwrite(buf
, 1, ret
, tf
->fp
);
470 static void text_output_many(textfile
*tf
, int n
, wchar_t c
)
479 static void text_rdaddw(rdstring
*rs
, word
*text
, word
*end
, textconfig
*cfg
) {
480 for (; text
&& text
!= end
; text
= text
->next
) switch (text
->type
) {
493 case word_WhiteSpace
:
496 case word_WkCodeSpace
:
500 case word_WkCodeQuote
:
501 assert(text
->type
!= word_CodeQuote
&&
502 text
->type
!= word_WkCodeQuote
);
503 if (towordstyle(text
->type
) == word_Emph
&&
504 (attraux(text
->aux
) == attr_First
||
505 attraux(text
->aux
) == attr_Only
))
506 rdadds(rs
, cfg
->startemph
);
507 else if (towordstyle(text
->type
) == word_Code
&&
508 (attraux(text
->aux
) == attr_First
||
509 attraux(text
->aux
) == attr_Only
))
510 rdadds(rs
, cfg
->lquote
);
511 if (removeattr(text
->type
) == word_Normal
) {
512 if (cvt_ok(cfg
->charset
, text
->text
) || !text
->alt
)
513 rdadds(rs
, text
->text
);
515 text_rdaddw(rs
, text
->alt
, NULL
, cfg
);
516 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
518 } else if (removeattr(text
->type
) == word_Quote
) {
519 rdadds(rs
, quoteaux(text
->aux
) == quote_Open ?
520 cfg
->lquote
: cfg
->rquote
);
522 if (towordstyle(text
->type
) == word_Emph
&&
523 (attraux(text
->aux
) == attr_Last
||
524 attraux(text
->aux
) == attr_Only
))
525 rdadds(rs
, cfg
->endemph
);
526 else if (towordstyle(text
->type
) == word_Code
&&
527 (attraux(text
->aux
) == attr_Last
||
528 attraux(text
->aux
) == attr_Only
))
529 rdadds(rs
, cfg
->rquote
);
534 static int text_width(void *, word
*);
536 static int text_width_list(void *ctx
, word
*text
) {
539 w
+= text_width(ctx
, text
);
545 static int text_width(void *ctx
, word
*text
) {
546 textconfig
*cfg
= (textconfig
*)ctx
;
550 switch (text
->type
) {
560 assert(text
->type
< word_internal_endattrs
);
563 attr
= towordstyle(text
->type
);
564 if (attr
== word_Emph
|| attr
== word_Code
) {
565 if (attraux(text
->aux
) == attr_Only
||
566 attraux(text
->aux
) == attr_First
)
567 wid
+= ustrwid(attr
== word_Emph ? cfg
->startemph
: cfg
->lquote
,
570 if (attr
== word_Emph
|| attr
== word_Code
) {
571 if (attraux(text
->aux
) == attr_Only
||
572 attraux(text
->aux
) == attr_Last
)
573 wid
+= ustrwid(attr
== word_Emph ? cfg
->startemph
: cfg
->lquote
,
577 switch (text
->type
) {
582 if (cvt_ok(cfg
->charset
, text
->text
) || !text
->alt
)
583 wid
+= ustrwid(text
->text
, cfg
->charset
);
585 wid
+= text_width_list(ctx
, text
->alt
);
588 case word_WhiteSpace
:
591 case word_WkCodeSpace
:
595 case word_WkCodeQuote
:
596 assert(text
->type
!= word_CodeQuote
&&
597 text
->type
!= word_WkCodeQuote
);
598 if (removeattr(text
->type
) == word_Quote
) {
599 if (quoteaux(text
->aux
) == quote_Open
)
600 wid
+= ustrwid(cfg
->lquote
, cfg
->charset
);
602 wid
+= ustrwid(cfg
->rquote
, cfg
->charset
);
610 static void text_heading(textfile
*tf
, word
*tprefix
, word
*nprefix
,
611 word
*text
, alignstruct align
,
612 int indent
, int width
, textconfig
*cfg
) {
613 rdstring t
= { 0, 0, NULL
};
615 int firstlinewidth
, wrapwidth
;
616 wrappedline
*wrapping
, *p
;
618 if (align
.just_numbers
&& nprefix
) {
619 text_rdaddw(&t
, nprefix
, NULL
, cfg
);
620 rdadds(&t
, align
.number_suffix
);
621 } else if (!align
.just_numbers
&& tprefix
) {
622 text_rdaddw(&t
, tprefix
, NULL
, cfg
);
623 rdadds(&t
, align
.number_suffix
);
625 margin
= length
= ustrwid(t
.text ? t
.text
: L
"", cfg
->charset
);
627 if (align
.align
== LEFTPLUS
) {
628 margin
= indent
- margin
;
629 if (margin
< 0) margin
= 0;
630 firstlinewidth
= indent
+ width
- margin
- length
;
632 } else if (align
.align
== LEFT
|| align
.align
== CENTRE
) {
634 firstlinewidth
= indent
+ width
- length
;
635 wrapwidth
= indent
+ width
;
638 wrapping
= wrap_para(text
, firstlinewidth
, wrapwidth
,
640 for (p
= wrapping
; p
; p
= p
->next
) {
641 text_rdaddw(&t
, p
->begin
, p
->end
, cfg
);
642 length
= ustrwid(t
.text ? t
.text
: L
"", cfg
->charset
);
643 if (align
.align
== CENTRE
) {
644 margin
= (indent
+ width
- length
)/2;
645 if (margin
< 0) margin
= 0;
647 text_output_many(tf
, margin
, L
' ');
648 text_output(tf
, t
.text
);
649 text_output(tf
, L
"\n");
650 if (*align
.underline
) {
651 text_output_many(tf
, margin
, L
' ');
653 text_output(tf
, align
.underline
);
654 length
-= ustrwid(align
.underline
, cfg
->charset
);
656 text_output(tf
, L
"\n");
658 if (align
.align
== LEFTPLUS
)
666 text_output(tf
, L
"\n");
671 static void text_rule(textfile
*tf
, int indent
, int width
, textconfig
*cfg
) {
672 text_output_many(tf
, indent
, L
' ');
674 text_output(tf
, cfg
->rule
);
675 width
-= ustrwid(cfg
->rule
, cfg
->charset
);
677 text_output_many(tf
, 2, L
'\n');
680 static void text_para(textfile
*tf
, word
*prefix
, wchar_t *prefixextra
,
681 word
*text
, int indent
, int extraindent
, int width
,
683 wrappedline
*wrapping
, *p
;
684 rdstring pfx
= { 0, 0, NULL
};
686 int firstlinewidth
= width
;
689 text_rdaddw(&pfx
, prefix
, NULL
, cfg
);
691 rdadds(&pfx
, prefixextra
);
692 text_output_many(tf
, indent
, L
' ');
693 text_output(tf
, pfx
.text
);
694 /* If the prefix is too long, shorten the first line to fit. */
695 e
= extraindent
- ustrwid(pfx
.text ? pfx
.text
: L
"", cfg
->charset
);
697 firstlinewidth
+= e
; /* this decreases it, since e < 0 */
698 if (firstlinewidth
< 0) {
699 e
= indent
+ extraindent
;
700 firstlinewidth
= width
;
701 text_output(tf
, L
"\n");
707 e
= indent
+ extraindent
;
709 wrapping
= wrap_para(text
, firstlinewidth
, width
,
711 for (p
= wrapping
; p
; p
= p
->next
) {
712 rdstring t
= { 0, 0, NULL
};
713 text_rdaddw(&t
, p
->begin
, p
->end
, cfg
);
714 text_output_many(tf
, e
, L
' ');
715 text_output(tf
, t
.text
);
716 text_output(tf
, L
"\n");
717 e
= indent
+ extraindent
;
721 text_output(tf
, L
"\n");
724 static void text_codepara(textfile
*tf
, word
*text
, int indent
, int width
) {
725 for (; text
; text
= text
->next
) if (text
->type
== word_WeakCode
) {
726 int wid
= ustrwid(text
->text
, tf
->charset
);
728 error(err_text_codeline
, &text
->fpos
, wid
, width
);
729 text_output_many(tf
, indent
, L
' ');
730 text_output(tf
, text
->text
);
731 text_output(tf
, L
"\n");
734 text_output(tf
, L
"\n");
737 static void text_versionid(textfile
*tf
, word
*text
, textconfig
*cfg
) {
738 rdstring t
= { 0, 0, NULL
};
741 text_rdaddw(&t
, text
, NULL
, cfg
);
745 text_output(tf
, t
.text
);