2 * text backend for Halibut
10 typedef enum { LEFT
, LEFTPLUS
, CENTRE
} alignment
;
13 int number_at_all
, just_numbers
;
15 wchar_t *number_suffix
;
19 int indent
, indent_code
;
20 int listindentbefore
, listindentafter
;
22 alignstruct atitle
, achapter
, *asect
;
24 int include_version_id
;
28 wchar_t *lquote
, *rquote
, *rule
;
30 wchar_t *listsuffix
, *startemph
, *endemph
;
39 static void text_heading(textfile
*, word
*, word
*, word
*, alignstruct
,
40 int, int, textconfig
*);
41 static void text_rule(textfile
*, int, int, textconfig
*);
42 static void text_para(textfile
*, word
*, wchar_t *, word
*, int, int, int,
44 static void text_codepara(textfile
*, word
*, int, int);
45 static void text_versionid(textfile
*, word
*, textconfig
*);
47 static void text_output(textfile
*, const wchar_t *);
48 static void text_output_many(textfile
*, int, wchar_t);
50 static alignment
utoalign(wchar_t *p
) {
51 if (!ustricmp(p
, L
"centre") || !ustricmp(p
, L
"center"))
53 if (!ustricmp(p
, L
"leftplus"))
58 static textconfig
text_configure(paragraph
*source
) {
66 ret
.bullet
.next
= NULL
;
67 ret
.bullet
.alt
= NULL
;
68 ret
.bullet
.type
= word_Normal
;
69 ret
.atitle
.just_numbers
= FALSE
; /* ignored */
70 ret
.atitle
.number_at_all
= TRUE
; /* ignored */
77 ret
.listindentbefore
= 1;
78 ret
.listindentafter
= 3;
80 ret
.atitle
.align
= CENTRE
;
81 ret
.atitle
.underline
= L
"\x2550\0=\0\0";
82 ret
.achapter
.align
= LEFT
;
83 ret
.achapter
.just_numbers
= FALSE
;
84 ret
.achapter
.number_at_all
= TRUE
;
85 ret
.achapter
.number_suffix
= L
": ";
86 ret
.achapter
.underline
= L
"\x203E\0-\0\0";
88 ret
.asect
= snewn(ret
.nasect
, alignstruct
);
89 ret
.asect
[0].align
= LEFTPLUS
;
90 ret
.asect
[0].just_numbers
= TRUE
;
91 ret
.asect
[0].number_at_all
= TRUE
;
92 ret
.asect
[0].number_suffix
= L
" ";
93 ret
.asect
[0].underline
= L
"\0";
94 ret
.include_version_id
= TRUE
;
95 ret
.indent_preambles
= FALSE
;
96 ret
.bullet
.text
= L
"\x2022\0-\0\0";
97 ret
.rule
= L
"\x2500\0-\0\0";
98 ret
.filename
= dupstr("output.txt");
99 ret
.startemph
= L
"_\0_\0\0";
100 ret
.endemph
= uadv(ret
.startemph
);
101 ret
.listsuffix
= L
".";
102 ret
.charset
= CS_ASCII
;
104 * Default quote characters are Unicode matched single quotes,
105 * falling back to the TeXlike `'.
107 ret
.lquote
= L
"\x2018\0\x2019\0`\0'\0\0";
108 ret
.rquote
= uadv(ret
.lquote
);
111 * Two-pass configuration so that we can pick up global config
112 * (e.g. `quotes') before having it overridden by specific
113 * config (`text-quotes'), irrespective of the order in which
116 for (p
= source
; p
; p
= p
->next
) {
117 if (p
->type
== para_Config
) {
118 if (!ustricmp(p
->keyword
, L
"quotes")) {
119 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
120 ret
.lquote
= uadv(p
->keyword
);
121 ret
.rquote
= uadv(ret
.lquote
);
127 for (p
= source
; p
; p
= p
->next
) {
128 if (p
->type
== para_Config
) {
129 if (!ustricmp(p
->keyword
, L
"text-indent")) {
130 ret
.indent
= utoi(uadv(p
->keyword
));
131 } else if (!ustricmp(p
->keyword
, L
"text-charset")) {
132 ret
.charset
= charset_from_ustr(&p
->fpos
, uadv(p
->keyword
));
133 } else if (!ustricmp(p
->keyword
, L
"text-filename")) {
135 ret
.filename
= dupstr(adv(p
->origkeyword
));
136 } else if (!ustricmp(p
->keyword
, L
"text-indent-code")) {
137 ret
.indent_code
= utoi(uadv(p
->keyword
));
138 } else if (!ustricmp(p
->keyword
, L
"text-width")) {
139 ret
.width
= utoi(uadv(p
->keyword
));
140 } else if (!ustricmp(p
->keyword
, L
"text-list-indent")) {
141 ret
.listindentbefore
= utoi(uadv(p
->keyword
));
142 } else if (!ustricmp(p
->keyword
, L
"text-listitem-indent")) {
143 ret
.listindentafter
= utoi(uadv(p
->keyword
));
144 } else if (!ustricmp(p
->keyword
, L
"text-chapter-align")) {
145 ret
.achapter
.align
= utoalign(uadv(p
->keyword
));
146 } else if (!ustricmp(p
->keyword
, L
"text-chapter-underline")) {
147 ret
.achapter
.underline
= uadv(p
->keyword
);
148 } else if (!ustricmp(p
->keyword
, L
"text-chapter-numeric")) {
149 ret
.achapter
.just_numbers
= utob(uadv(p
->keyword
));
150 } else if (!ustricmp(p
->keyword
, L
"text-chapter-shownumber")) {
151 ret
.achapter
.number_at_all
= utob(uadv(p
->keyword
));
152 } else if (!ustricmp(p
->keyword
, L
"text-chapter-suffix")) {
153 ret
.achapter
.number_suffix
= uadv(p
->keyword
);
154 } else if (!ustricmp(p
->keyword
, L
"text-section-align")) {
155 wchar_t *q
= uadv(p
->keyword
);
161 if (n
>= ret
.nasect
) {
163 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
164 for (i
= ret
.nasect
; i
<= n
; i
++)
165 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
168 ret
.asect
[n
].align
= utoalign(q
);
169 } else if (!ustricmp(p
->keyword
, L
"text-section-underline")) {
170 wchar_t *q
= uadv(p
->keyword
);
176 if (n
>= ret
.nasect
) {
178 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
179 for (i
= ret
.nasect
; i
<= n
; i
++)
180 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
183 ret
.asect
[n
].underline
= q
;
184 } else if (!ustricmp(p
->keyword
, L
"text-section-numeric")) {
185 wchar_t *q
= uadv(p
->keyword
);
191 if (n
>= ret
.nasect
) {
193 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
194 for (i
= ret
.nasect
; i
<= n
; i
++)
195 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
198 ret
.asect
[n
].just_numbers
= utob(q
);
199 } else if (!ustricmp(p
->keyword
, L
"text-section-shownumber")) {
200 wchar_t *q
= uadv(p
->keyword
);
206 if (n
>= ret
.nasect
) {
208 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
209 for (i
= ret
.nasect
; i
<= n
; i
++)
210 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
213 ret
.asect
[n
].number_at_all
= utob(q
);
214 } else if (!ustricmp(p
->keyword
, L
"text-section-suffix")) {
215 wchar_t *q
= uadv(p
->keyword
);
221 if (n
>= ret
.nasect
) {
223 ret
.asect
= sresize(ret
.asect
, n
+1, alignstruct
);
224 for (i
= ret
.nasect
; i
<= n
; i
++) {
225 ret
.asect
[i
] = ret
.asect
[ret
.nasect
-1];
229 ret
.asect
[n
].number_suffix
= q
;
230 } else if (!ustricmp(p
->keyword
, L
"text-title-align")) {
231 ret
.atitle
.align
= utoalign(uadv(p
->keyword
));
232 } else if (!ustricmp(p
->keyword
, L
"text-title-underline")) {
233 ret
.atitle
.underline
= uadv(p
->keyword
);
234 } else if (!ustricmp(p
->keyword
, L
"text-versionid")) {
235 ret
.include_version_id
= utob(uadv(p
->keyword
));
236 } else if (!ustricmp(p
->keyword
, L
"text-indent-preamble")) {
237 ret
.indent_preambles
= utob(uadv(p
->keyword
));
238 } else if (!ustricmp(p
->keyword
, L
"text-bullet")) {
239 ret
.bullet
.text
= uadv(p
->keyword
);
240 } else if (!ustricmp(p
->keyword
, L
"text-rule")) {
241 ret
.rule
= uadv(p
->keyword
);
242 } else if (!ustricmp(p
->keyword
, L
"text-list-suffix")) {
243 ret
.listsuffix
= uadv(p
->keyword
);
244 } else if (!ustricmp(p
->keyword
, L
"text-emphasis")) {
245 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
246 ret
.startemph
= uadv(p
->keyword
);
247 ret
.endemph
= uadv(ret
.startemph
);
249 } else if (!ustricmp(p
->keyword
, L
"text-quotes")) {
250 if (*uadv(p
->keyword
) && *uadv(uadv(p
->keyword
))) {
251 ret
.lquote
= uadv(p
->keyword
);
252 ret
.rquote
= uadv(ret
.lquote
);
259 * Now process fallbacks on quote characters, underlines, the
260 * rule character, the emphasis characters, and bullets.
262 while (*uadv(ret
.rquote
) && *uadv(uadv(ret
.rquote
)) &&
263 (!cvt_ok(ret
.charset
, ret
.lquote
) ||
264 !cvt_ok(ret
.charset
, ret
.rquote
))) {
265 ret
.lquote
= uadv(ret
.rquote
);
266 ret
.rquote
= uadv(ret
.lquote
);
269 while (*uadv(ret
.endemph
) && *uadv(uadv(ret
.endemph
)) &&
270 (!cvt_ok(ret
.charset
, ret
.startemph
) ||
271 !cvt_ok(ret
.charset
, ret
.endemph
))) {
272 ret
.startemph
= uadv(ret
.endemph
);
273 ret
.endemph
= uadv(ret
.startemph
);
276 while (*ret
.atitle
.underline
&& *uadv(ret
.atitle
.underline
) &&
277 !cvt_ok(ret
.charset
, ret
.atitle
.underline
))
278 ret
.atitle
.underline
= uadv(ret
.atitle
.underline
);
280 while (*ret
.achapter
.underline
&& *uadv(ret
.achapter
.underline
) &&
281 !cvt_ok(ret
.charset
, ret
.achapter
.underline
))
282 ret
.achapter
.underline
= uadv(ret
.achapter
.underline
);
284 for (n
= 0; n
< ret
.nasect
; n
++) {
285 while (*ret
.asect
[n
].underline
&& *uadv(ret
.asect
[n
].underline
) &&
286 !cvt_ok(ret
.charset
, ret
.asect
[n
].underline
))
287 ret
.asect
[n
].underline
= uadv(ret
.asect
[n
].underline
);
290 while (*ret
.bullet
.text
&& *uadv(ret
.bullet
.text
) &&
291 !cvt_ok(ret
.charset
, ret
.bullet
.text
))
292 ret
.bullet
.text
= uadv(ret
.bullet
.text
);
294 while (*ret
.rule
&& *uadv(ret
.rule
) &&
295 !cvt_ok(ret
.charset
, ret
.rule
))
296 ret
.rule
= uadv(ret
.rule
);
301 paragraph
*text_config_filename(char *filename
)
303 return cmdline_cfg_simple("text-filename", filename
, NULL
);
306 void text_backend(paragraph
*sourceform
, keywordlist
*keywords
,
307 indexdata
*idx
, void *unused
) {
310 word
*prefix
, *body
, *wp
;
313 wchar_t *prefixextra
;
314 int nesting
, nestbase
, nestindent
;
315 int indentb
, indenta
;
318 IGNORE(keywords
); /* we don't happen to need this */
319 IGNORE(idx
); /* or this */
321 conf
= text_configure(sourceform
);
324 * Open the output file.
326 if (!strcmp(conf
.filename
, "-"))
329 tf
.fp
= fopen(conf
.filename
, "w");
331 error(err_cantopenw
, conf
.filename
);
334 tf
.charset
= conf
.charset
;
335 tf
.state
= charset_init_state
;
338 for (p
= sourceform
; p
; p
= p
->next
)
339 if (p
->type
== para_Title
)
340 text_heading(&tf
, NULL
, NULL
, p
->words
,
341 conf
.atitle
, conf
.indent
, conf
.width
, &conf
);
343 nestindent
= conf
.listindentbefore
+ conf
.listindentafter
;
344 nestbase
= (conf
.indent_preambles ?
0 : -conf
.indent
);
347 /* Do the main document */
348 for (p
= sourceform
; p
; p
= p
->next
) switch (p
->type
) {
355 assert(nesting
>= 0);
359 nesting
+= nestindent
;
362 nesting
-= nestindent
;
363 assert(nesting
>= nestbase
);
367 * Things we ignore because we've already processed them or
368 * aren't going to touch them in this pass.
372 case para_Biblio
: /* only touch BiblioCited */
383 case para_UnnumberedChapter
:
384 text_heading(&tf
, p
->kwtext
, p
->kwtext2
, p
->words
,
385 conf
.achapter
, conf
.indent
, conf
.width
, &conf
);
391 text_heading(&tf
, p
->kwtext
, p
->kwtext2
, p
->words
,
392 conf
.asect
[p
->aux
>=conf
.nasect ? conf
.nasect
-1 : p
->aux
],
393 conf
.indent
, conf
.width
, &conf
);
397 text_rule(&tf
, conf
.indent
+ nesting
, conf
.width
- nesting
, &conf
);
402 case para_DescribedThing
:
403 case para_Description
:
404 case para_BiblioCited
:
406 case para_NumberedList
:
407 if (p
->type
== para_Bullet
) {
408 prefix
= &conf
.bullet
;
410 indentb
= conf
.listindentbefore
;
411 indenta
= conf
.listindentafter
;
412 } else if (p
->type
== para_NumberedList
) {
414 prefixextra
= conf
.listsuffix
;
415 indentb
= conf
.listindentbefore
;
416 indenta
= conf
.listindentafter
;
417 } else if (p
->type
== para_Description
) {
420 indentb
= conf
.listindentbefore
;
421 indenta
= conf
.listindentafter
;
425 indentb
= indenta
= 0;
427 if (p
->type
== para_BiblioCited
) {
428 body
= dup_word_list(p
->kwtext
);
429 for (wp
= body
; wp
->next
; wp
= wp
->next
);
430 wp
->next
= &spaceword
;
431 spaceword
.next
= p
->words
;
432 spaceword
.alt
= NULL
;
433 spaceword
.type
= word_WhiteSpace
;
434 spaceword
.text
= NULL
;
439 text_para(&tf
, prefix
, prefixextra
, body
,
440 conf
.indent
+ nesting
+ indentb
, indenta
,
441 conf
.width
- nesting
- indentb
- indenta
, &conf
);
444 free_word_list(body
);
449 text_codepara(&tf
, p
->words
,
450 conf
.indent
+ nesting
+ conf
.indent_code
,
451 conf
.width
- nesting
- 2 * conf
.indent_code
);
455 /* Do the version ID */
456 if (conf
.include_version_id
) {
457 for (p
= sourceform
; p
; p
= p
->next
)
458 if (p
->type
== para_VersionID
)
459 text_versionid(&tf
, p
->words
, &conf
);
465 text_output(&tf
, NULL
); /* end charset conversion */
469 sfree(conf
.filename
);
472 static void text_output(textfile
*tf
, const wchar_t *s
)
487 ret
= charset_from_unicode(sp
, &len
, buf
, lenof(buf
),
488 tf
->charset
, &tf
->state
, NULL
);
491 fwrite(buf
, 1, ret
, tf
->fp
);
495 static void text_output_many(textfile
*tf
, int n
, wchar_t c
)
504 static void text_rdaddw(rdstring
*rs
, word
*text
, word
*end
, textconfig
*cfg
) {
505 for (; text
&& text
!= end
; text
= text
->next
) switch (text
->type
) {
518 case word_WhiteSpace
:
521 case word_WkCodeSpace
:
525 case word_WkCodeQuote
:
526 assert(text
->type
!= word_CodeQuote
&&
527 text
->type
!= word_WkCodeQuote
);
528 if (towordstyle(text
->type
) == word_Emph
&&
529 (attraux(text
->aux
) == attr_First
||
530 attraux(text
->aux
) == attr_Only
))
531 rdadds(rs
, cfg
->startemph
);
532 else if (towordstyle(text
->type
) == word_Code
&&
533 (attraux(text
->aux
) == attr_First
||
534 attraux(text
->aux
) == attr_Only
))
535 rdadds(rs
, cfg
->lquote
);
536 if (removeattr(text
->type
) == word_Normal
) {
537 if (cvt_ok(cfg
->charset
, text
->text
) || !text
->alt
)
538 rdadds(rs
, text
->text
);
540 text_rdaddw(rs
, text
->alt
, NULL
, cfg
);
541 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
543 } else if (removeattr(text
->type
) == word_Quote
) {
544 rdadds(rs
, quoteaux(text
->aux
) == quote_Open ?
545 cfg
->lquote
: cfg
->rquote
);
547 if (towordstyle(text
->type
) == word_Emph
&&
548 (attraux(text
->aux
) == attr_Last
||
549 attraux(text
->aux
) == attr_Only
))
550 rdadds(rs
, cfg
->endemph
);
551 else if (towordstyle(text
->type
) == word_Code
&&
552 (attraux(text
->aux
) == attr_Last
||
553 attraux(text
->aux
) == attr_Only
))
554 rdadds(rs
, cfg
->rquote
);
559 static int text_width(void *, word
*);
561 static int text_width_list(void *ctx
, word
*text
) {
564 w
+= text_width(ctx
, text
);
570 static int text_width(void *ctx
, word
*text
) {
571 textconfig
*cfg
= (textconfig
*)ctx
;
575 switch (text
->type
) {
585 assert(text
->type
< word_internal_endattrs
);
588 attr
= towordstyle(text
->type
);
589 if (attr
== word_Emph
|| attr
== word_Code
) {
590 if (attraux(text
->aux
) == attr_Only
||
591 attraux(text
->aux
) == attr_First
)
592 wid
+= ustrwid(attr
== word_Emph ? cfg
->startemph
: cfg
->lquote
,
595 if (attr
== word_Emph
|| attr
== word_Code
) {
596 if (attraux(text
->aux
) == attr_Only
||
597 attraux(text
->aux
) == attr_Last
)
598 wid
+= ustrwid(attr
== word_Emph ? cfg
->startemph
: cfg
->lquote
,
602 switch (text
->type
) {
607 if (cvt_ok(cfg
->charset
, text
->text
) || !text
->alt
)
608 wid
+= ustrwid(text
->text
, cfg
->charset
);
610 wid
+= text_width_list(ctx
, text
->alt
);
613 case word_WhiteSpace
:
616 case word_WkCodeSpace
:
620 case word_WkCodeQuote
:
621 assert(text
->type
!= word_CodeQuote
&&
622 text
->type
!= word_WkCodeQuote
);
623 if (removeattr(text
->type
) == word_Quote
) {
624 if (quoteaux(text
->aux
) == quote_Open
)
625 wid
+= ustrwid(cfg
->lquote
, cfg
->charset
);
627 wid
+= ustrwid(cfg
->rquote
, cfg
->charset
);
635 static void text_heading(textfile
*tf
, word
*tprefix
, word
*nprefix
,
636 word
*text
, alignstruct align
,
637 int indent
, int width
, textconfig
*cfg
) {
638 rdstring t
= { 0, 0, NULL
};
640 int firstlinewidth
, wrapwidth
;
641 wrappedline
*wrapping
, *p
;
643 if (align
.number_at_all
) {
644 if (align
.just_numbers
&& nprefix
) {
645 text_rdaddw(&t
, nprefix
, NULL
, cfg
);
646 rdadds(&t
, align
.number_suffix
);
647 } else if (!align
.just_numbers
&& tprefix
) {
648 text_rdaddw(&t
, tprefix
, NULL
, cfg
);
649 rdadds(&t
, align
.number_suffix
);
652 margin
= length
= ustrwid(t
.text ? t
.text
: L
"", cfg
->charset
);
654 if (align
.align
== LEFTPLUS
) {
655 margin
= indent
- margin
;
656 if (margin
< 0) margin
= 0;
657 firstlinewidth
= indent
+ width
- margin
- length
;
659 } else if (align
.align
== LEFT
|| align
.align
== CENTRE
) {
661 firstlinewidth
= indent
+ width
- length
;
662 wrapwidth
= indent
+ width
;
665 wrapping
= wrap_para(text
, firstlinewidth
, wrapwidth
,
667 for (p
= wrapping
; p
; p
= p
->next
) {
668 text_rdaddw(&t
, p
->begin
, p
->end
, cfg
);
669 length
= ustrwid(t
.text ? t
.text
: L
"", cfg
->charset
);
670 if (align
.align
== CENTRE
) {
671 margin
= (indent
+ width
- length
)/2;
672 if (margin
< 0) margin
= 0;
674 text_output_many(tf
, margin
, L
' ');
675 text_output(tf
, t
.text
);
676 text_output(tf
, L
"\n");
677 if (*align
.underline
) {
678 text_output_many(tf
, margin
, L
' ');
680 text_output(tf
, align
.underline
);
681 length
-= ustrwid(align
.underline
, cfg
->charset
);
683 text_output(tf
, L
"\n");
685 if (align
.align
== LEFTPLUS
)
693 text_output(tf
, L
"\n");
698 static void text_rule(textfile
*tf
, int indent
, int width
, textconfig
*cfg
) {
699 text_output_many(tf
, indent
, L
' ');
701 text_output(tf
, cfg
->rule
);
702 width
-= ustrwid(cfg
->rule
, cfg
->charset
);
704 text_output_many(tf
, 2, L
'\n');
707 static void text_para(textfile
*tf
, word
*prefix
, wchar_t *prefixextra
,
708 word
*text
, int indent
, int extraindent
, int width
,
710 wrappedline
*wrapping
, *p
;
711 rdstring pfx
= { 0, 0, NULL
};
713 int firstlinewidth
= width
;
716 text_rdaddw(&pfx
, prefix
, NULL
, cfg
);
718 rdadds(&pfx
, prefixextra
);
719 text_output_many(tf
, indent
, L
' ');
720 text_output(tf
, pfx
.text
);
721 /* If the prefix is too long, shorten the first line to fit. */
722 e
= extraindent
- ustrwid(pfx
.text ? pfx
.text
: L
"", cfg
->charset
);
724 firstlinewidth
+= e
; /* this decreases it, since e < 0 */
725 if (firstlinewidth
< 0) {
726 e
= indent
+ extraindent
;
727 firstlinewidth
= width
;
728 text_output(tf
, L
"\n");
734 e
= indent
+ extraindent
;
736 wrapping
= wrap_para(text
, firstlinewidth
, width
,
738 for (p
= wrapping
; p
; p
= p
->next
) {
739 rdstring t
= { 0, 0, NULL
};
740 text_rdaddw(&t
, p
->begin
, p
->end
, cfg
);
741 text_output_many(tf
, e
, L
' ');
742 text_output(tf
, t
.text
);
743 text_output(tf
, L
"\n");
744 e
= indent
+ extraindent
;
748 text_output(tf
, L
"\n");
751 static void text_codepara(textfile
*tf
, word
*text
, int indent
, int width
) {
752 for (; text
; text
= text
->next
) if (text
->type
== word_WeakCode
) {
753 int wid
= ustrwid(text
->text
, tf
->charset
);
755 error(err_text_codeline
, &text
->fpos
, wid
, width
);
756 text_output_many(tf
, indent
, L
' ');
757 text_output(tf
, text
->text
);
758 text_output(tf
, L
"\n");
761 text_output(tf
, L
"\n");
764 static void text_versionid(textfile
*tf
, word
*text
, textconfig
*cfg
) {
765 rdstring t
= { 0, 0, NULL
};
768 text_rdaddw(&t
, text
, NULL
, cfg
);
772 text_output(tf
, t
.text
);