Add support for doing different underlining of different heading levels
[sgt/halibut] / bk_info.c
1 /*
2 * info backend for Halibut
3 *
4 * Possible future work:
5 *
6 * - configurable choice of how to allocate node names?
7 * + possibly a template-like approach, choosing node names to
8 * be the full section title or perhaps the internal keyword?
9 * + neither of those seems quite right. Perhaps instead a
10 * Windows Help-like mechanism, where a magic config
11 * directive allows user choice of name for every node.
12 * + Only trouble with that is, now what happens to the section
13 * numbers? Do they become completely vestigial and just sit
14 * in the title text of each node? Or do we keep them in the
15 * menus somehow? I think people might occasionally want to
16 * go to a section by number, if only because all the _other_
17 * formats of the same document will reference the numbers
18 * all the time. So our menu lines could look like one of
19 * these:
20 * * Nodename: Section 1.2. Title of section.
21 * * Section 1.2: Nodename. Title of section.
22 *
23 * - might be helpful to diagnose duplicate node names!
24 *
25 * - more flexibility in heading underlines, like text backend.
26 * + Given info.el's fontifier, we'd want the following defaults:
27 * \cfg{info-title-underline}{*}
28 * \cfg{info-chapter-underline}{=}
29 * \cfg{info-section-underline}{0}{-}
30 * \cfg{info-section-underline}{1}{.}
31 *
32 * - Indices generated by makeinfo use a menu rather than a bunch of
33 * cross-references, which reduces visual clutter rather. For
34 * singly-referenced items, it looks like:
35 * * toner cartridge, replacing: Toner.
36 * It does a horrid job on multiply-referenced entries, though,
37 * perhaps because the name before the colon is meant to be unique.
38 * Info's 'i' command requires the use of a menu -- it fails to
39 * find any index entries at all with Halibut's current index format.
40 *
41 * - The string "*note" is matched case-insensitively, so we could
42 * make things slightly less ugly by using the lower-case version
43 * when the user asks for \k. Unfortunately, standalone Info seems
44 * to match node names case-sensitively, so we can't downcase that.
45 */
46
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <assert.h>
50 #include "halibut.h"
51
52 typedef struct {
53 wchar_t *underline;
54 } alignstruct;
55
56 typedef struct {
57 char *filename;
58 int maxfilesize;
59 int charset;
60 int listindentbefore, listindentafter;
61 int indent_code, width, index_width;
62 alignstruct atitle, achapter, *asect;
63 int nasect;
64 wchar_t *bullet, *listsuffix;
65 wchar_t *startemph, *endemph;
66 wchar_t *lquote, *rquote;
67 wchar_t *sectsuffix;
68 wchar_t *rule;
69 wchar_t *index_text;
70 } infoconfig;
71
72 typedef struct {
73 rdstringc output;
74 int charset;
75 charset_state state;
76 int wcmode;
77 } info_data;
78 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
79 static const info_data empty_info_data = EMPTY_INFO_DATA;
80
81 typedef struct node_tag node;
82 struct node_tag {
83 node *listnext;
84 node *up, *prev, *next, *lastchild;
85 int pos, started_menu, filenum;
86 char *name;
87 info_data text;
88 };
89
90 typedef struct {
91 char *text;
92 int length;
93 int nnodes, nodesize;
94 node **nodes;
95 } info_idx;
96
97 static int info_rdadd(info_data *, wchar_t);
98 static int info_rdadds(info_data *, wchar_t const *);
99 static int info_rdaddc(info_data *, char);
100 static int info_rdaddsc(info_data *, char const *);
101
102 static void info_heading(info_data *, word *, word *, alignstruct, int,
103 infoconfig *);
104 static void info_rule(info_data *, int, int, infoconfig *);
105 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
106 int, int, int, infoconfig *);
107 static void info_codepara(info_data *, word *, int, int);
108 static void info_versionid(info_data *, word *, infoconfig *);
109 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
110 static word *info_transform_wordlist(word *, keywordlist *);
111 static int info_check_index(word *, node *, indexdata *);
112
113 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
114
115 static node *info_node_new(char *name, int charset);
116 static char *info_node_name_for_para(paragraph *p, infoconfig *);
117 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
118
119 static infoconfig info_configure(paragraph *source) {
120 infoconfig ret;
121 paragraph *p;
122 int n;
123
124 /*
125 * Defaults.
126 */
127 ret.filename = dupstr("output.info");
128 ret.maxfilesize = 64 << 10;
129 ret.charset = CS_ASCII;
130 ret.width = 70;
131 ret.listindentbefore = 1;
132 ret.listindentafter = 3;
133 ret.indent_code = 2;
134 ret.index_width = 40;
135 ret.listsuffix = L".";
136 ret.bullet = L"\x2022\0-\0\0";
137 ret.rule = L"\x2500\0-\0\0";
138 ret.startemph = L"_\0_\0\0";
139 ret.endemph = uadv(ret.startemph);
140 ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
141 ret.rquote = uadv(ret.lquote);
142 ret.sectsuffix = L": ";
143 /*
144 * Default underline characters are chosen to match those recognised by
145 * Info-fontify-node.
146 */
147 ret.atitle.underline = L"*\0\0";
148 ret.achapter.underline = L"=\0\0";
149 ret.nasect = 2;
150 ret.asect = snewn(ret.nasect, alignstruct);
151 ret.asect[0].underline = L"-\0\0";
152 ret.asect[1].underline = L".\0\0";
153 ret.index_text = L"Index";
154
155 /*
156 * Two-pass configuration so that we can pick up global config
157 * (e.g. `quotes') before having it overridden by specific
158 * config (`info-quotes'), irrespective of the order in which
159 * they occur.
160 */
161 for (p = source; p; p = p->next) {
162 if (p->type == para_Config) {
163 if (!ustricmp(p->keyword, L"quotes")) {
164 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
165 ret.lquote = uadv(p->keyword);
166 ret.rquote = uadv(ret.lquote);
167 }
168 } else if (!ustricmp(p->keyword, L"index")) {
169 ret.index_text = uadv(p->keyword);
170 }
171 }
172 }
173
174 for (p = source; p; p = p->next) {
175 if (p->type == para_Config) {
176 if (!ustricmp(p->keyword, L"info-filename")) {
177 sfree(ret.filename);
178 ret.filename = dupstr(adv(p->origkeyword));
179 } else if (!ustricmp(p->keyword, L"info-charset")) {
180 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
181 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
182 ret.maxfilesize = utoi(uadv(p->keyword));
183 } else if (!ustricmp(p->keyword, L"info-width")) {
184 ret.width = utoi(uadv(p->keyword));
185 } else if (!ustricmp(p->keyword, L"info-indent-code")) {
186 ret.indent_code = utoi(uadv(p->keyword));
187 } else if (!ustricmp(p->keyword, L"info-index-width")) {
188 ret.index_width = utoi(uadv(p->keyword));
189 } else if (!ustricmp(p->keyword, L"info-list-indent")) {
190 ret.listindentbefore = utoi(uadv(p->keyword));
191 } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
192 ret.listindentafter = utoi(uadv(p->keyword));
193 } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
194 ret.sectsuffix = uadv(p->keyword);
195 } else if (!ustricmp(p->keyword, L"info-underline")) {
196 ret.atitle.underline = ret.achapter.underline =
197 uadv(p->keyword);
198 for (n = 0; n < ret.nasect; n++)
199 ret.asect[n].underline = ret.atitle.underline;
200 } else if (!ustricmp(p->keyword, L"info-chapter-underline")) {
201 ret.achapter.underline = uadv(p->keyword);
202 } else if (!ustricmp(p->keyword, L"info-section-underline")) {
203 wchar_t *q = uadv(p->keyword);
204 int n = 0;
205 if (uisdigit(*q)) {
206 n = utoi(q);
207 q = uadv(q);
208 }
209 if (n >= ret.nasect) {
210 int i;
211 ret.asect = sresize(ret.asect, n+1, alignstruct);
212 for (i = ret.nasect; i <= n; i++)
213 ret.asect[i] = ret.asect[ret.nasect-1];
214 ret.nasect = n+1;
215 }
216 ret.asect[n].underline = q;
217 } else if (!ustricmp(p->keyword, L"text-title-underline")) {
218 ret.atitle.underline = uadv(p->keyword);
219 } else if (!ustricmp(p->keyword, L"info-bullet")) {
220 ret.bullet = uadv(p->keyword);
221 } else if (!ustricmp(p->keyword, L"info-rule")) {
222 ret.rule = uadv(p->keyword);
223 } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
224 ret.listsuffix = uadv(p->keyword);
225 } else if (!ustricmp(p->keyword, L"info-emphasis")) {
226 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
227 ret.startemph = uadv(p->keyword);
228 ret.endemph = uadv(ret.startemph);
229 }
230 } else if (!ustricmp(p->keyword, L"info-quotes")) {
231 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
232 ret.lquote = uadv(p->keyword);
233 ret.rquote = uadv(ret.lquote);
234 }
235 }
236 }
237 }
238
239 /*
240 * Now process fallbacks on quote characters, underlines, the
241 * rule character, the emphasis characters, and bullets.
242 */
243 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
244 (!cvt_ok(ret.charset, ret.lquote) ||
245 !cvt_ok(ret.charset, ret.rquote))) {
246 ret.lquote = uadv(ret.rquote);
247 ret.rquote = uadv(ret.lquote);
248 }
249
250 while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
251 (!cvt_ok(ret.charset, ret.startemph) ||
252 !cvt_ok(ret.charset, ret.endemph))) {
253 ret.startemph = uadv(ret.endemph);
254 ret.endemph = uadv(ret.startemph);
255 }
256
257 while (*ret.atitle.underline && *uadv(ret.atitle.underline) &&
258 !cvt_ok(ret.charset, ret.atitle.underline))
259 ret.atitle.underline = uadv(ret.atitle.underline);
260
261 while (*ret.achapter.underline && *uadv(ret.achapter.underline) &&
262 !cvt_ok(ret.charset, ret.achapter.underline))
263 ret.achapter.underline = uadv(ret.achapter.underline);
264
265 for (n = 0; n < ret.nasect; n++) {
266 while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) &&
267 !cvt_ok(ret.charset, ret.asect[n].underline))
268 ret.asect[n].underline = uadv(ret.asect[n].underline);
269 }
270
271 while (*ret.bullet && *uadv(ret.bullet) &&
272 !cvt_ok(ret.charset, ret.bullet))
273 ret.bullet = uadv(ret.bullet);
274
275 while (*ret.rule && *uadv(ret.rule) &&
276 !cvt_ok(ret.charset, ret.rule))
277 ret.rule = uadv(ret.rule);
278
279 return ret;
280 }
281
282 paragraph *info_config_filename(char *filename)
283 {
284 return cmdline_cfg_simple("info-filename", filename, NULL);
285 }
286
287 void info_backend(paragraph *sourceform, keywordlist *keywords,
288 indexdata *idx, void *unused) {
289 paragraph *p;
290 infoconfig conf;
291 word *prefix, *body, *wp;
292 word spaceword;
293 wchar_t *prefixextra;
294 int nesting, nestindent;
295 int indentb, indenta;
296 int filepos;
297 int has_index;
298 info_data intro_text = EMPTY_INFO_DATA;
299 node *topnode, *currnode;
300 word bullet;
301 FILE *fp;
302
303 IGNORE(unused);
304
305 conf = info_configure(sourceform);
306
307 /*
308 * Go through and create a node for each section.
309 */
310 topnode = info_node_new("Top", conf.charset);
311 currnode = topnode;
312 for (p = sourceform; p; p = p->next) switch (p->type) {
313 /*
314 * Chapter titles.
315 */
316 case para_Chapter:
317 case para_Appendix:
318 case para_UnnumberedChapter:
319 case para_Heading:
320 case para_Subsect:
321 {
322 node *newnode, *upnode;
323 char *nodename;
324
325 nodename = info_node_name_for_para(p, &conf);
326 newnode = info_node_new(nodename, conf.charset);
327 sfree(nodename);
328
329 p->private_data = newnode;
330
331 if (p->parent)
332 upnode = (node *)p->parent->private_data;
333 else
334 upnode = topnode;
335 assert(upnode);
336 newnode->up = upnode;
337
338 currnode->next = newnode;
339 newnode->prev = currnode;
340
341 currnode->listnext = newnode;
342 currnode = newnode;
343 }
344 break;
345 default:
346 p->private_data = NULL;
347 break;
348 }
349
350 /*
351 * Set up the display form of each index entry.
352 */
353 {
354 int i;
355 indexentry *entry;
356
357 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
358 info_idx *ii = snew(info_idx);
359 info_data id = EMPTY_INFO_DATA;
360
361 id.charset = conf.charset;
362
363 ii->nnodes = ii->nodesize = 0;
364 ii->nodes = NULL;
365
366 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
367
368 ii->text = id.output.text;
369
370 entry->backend_data = ii;
371 }
372 }
373
374 /*
375 * An Info file begins with a piece of introductory text which
376 * is apparently never shown anywhere. This seems to me to be a
377 * good place to put the copyright notice and the version IDs.
378 * Also, Info directory entries are expected to go here.
379 */
380 intro_text.charset = conf.charset;
381
382 info_rdaddsc(&intro_text,
383 "This Info file generated by Halibut, ");
384 info_rdaddsc(&intro_text, version);
385 info_rdaddsc(&intro_text, "\n\n");
386
387 for (p = sourceform; p; p = p->next)
388 if (p->type == para_Config &&
389 !ustricmp(p->keyword, L"info-dir-entry")) {
390 wchar_t *section, *shortname, *longname, *kw;
391 char *s;
392
393 section = uadv(p->keyword);
394 shortname = *section ? uadv(section) : L"";
395 longname = *shortname ? uadv(shortname) : L"";
396 kw = *longname ? uadv(longname) : L"";
397
398 if (!*longname) {
399 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
400 continue;
401 }
402
403 info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
404 info_rdadds(&intro_text, section);
405 info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
406 info_rdadds(&intro_text, shortname);
407 info_rdaddsc(&intro_text, ": (");
408 s = dupstr(conf.filename);
409 if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
410 s[strlen(s)-5] = '\0';
411 info_rdaddsc(&intro_text, s);
412 sfree(s);
413 info_rdaddsc(&intro_text, ")");
414 if (*kw) {
415 keyword *kwl = kw_lookup(keywords, kw);
416 if (kwl && kwl->para->private_data) {
417 node *n = (node *)kwl->para->private_data;
418 info_rdaddsc(&intro_text, n->name);
419 }
420 }
421 info_rdaddsc(&intro_text, ". ");
422 info_rdadds(&intro_text, longname);
423 info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
424 }
425
426 for (p = sourceform; p; p = p->next)
427 if (p->type == para_Copyright)
428 info_para(&intro_text, NULL, NULL, p->words, keywords,
429 0, 0, conf.width, &conf);
430
431 for (p = sourceform; p; p = p->next)
432 if (p->type == para_VersionID)
433 info_versionid(&intro_text, p->words, &conf);
434
435 if (intro_text.output.text[intro_text.output.pos-1] != '\n')
436 info_rdaddc(&intro_text, '\n');
437
438 /* Do the title */
439 for (p = sourceform; p; p = p->next)
440 if (p->type == para_Title)
441 info_heading(&topnode->text, NULL, p->words,
442 conf.atitle, conf.width, &conf);
443
444 nestindent = conf.listindentbefore + conf.listindentafter;
445 nesting = 0;
446
447 currnode = topnode;
448
449 /* Do the main document */
450 for (p = sourceform; p; p = p->next) switch (p->type) {
451
452 case para_QuotePush:
453 nesting += 2;
454 break;
455 case para_QuotePop:
456 nesting -= 2;
457 assert(nesting >= 0);
458 break;
459
460 case para_LcontPush:
461 nesting += nestindent;
462 break;
463 case para_LcontPop:
464 nesting -= nestindent;
465 assert(nesting >= 0);
466 break;
467
468 /*
469 * Things we ignore because we've already processed them or
470 * aren't going to touch them in this pass.
471 */
472 case para_IM:
473 case para_BR:
474 case para_Biblio: /* only touch BiblioCited */
475 case para_VersionID:
476 case para_NoCite:
477 case para_Title:
478 break;
479
480 /*
481 * Chapter titles.
482 */
483 case para_Chapter:
484 case para_Appendix:
485 case para_UnnumberedChapter:
486 case para_Heading:
487 case para_Subsect:
488 currnode = p->private_data;
489 assert(currnode);
490 assert(currnode->up);
491
492 if (!currnode->up->started_menu) {
493 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
494 currnode->up->started_menu = TRUE;
495 }
496 info_menu_item(&currnode->up->text, currnode, p, &conf);
497
498 has_index |= info_check_index(p->words, currnode, idx);
499 if (p->type == para_Chapter || p->type == para_Appendix ||
500 p->type == para_UnnumberedChapter)
501 info_heading(&currnode->text, p->kwtext, p->words,
502 conf.achapter, conf.width, &conf);
503 else
504 info_heading(&currnode->text, p->kwtext, p->words,
505 conf.asect[p->aux>=conf.nasect?conf.nasect-1:p->aux],
506 conf.width, &conf);
507 nesting = 0;
508 break;
509
510 case para_Rule:
511 info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
512 break;
513
514 case para_Normal:
515 case para_Copyright:
516 case para_DescribedThing:
517 case para_Description:
518 case para_BiblioCited:
519 case para_Bullet:
520 case para_NumberedList:
521 has_index |= info_check_index(p->words, currnode, idx);
522 if (p->type == para_Bullet) {
523 bullet.next = NULL;
524 bullet.alt = NULL;
525 bullet.type = word_Normal;
526 bullet.text = conf.bullet;
527 prefix = &bullet;
528 prefixextra = NULL;
529 indentb = conf.listindentbefore;
530 indenta = conf.listindentafter;
531 } else if (p->type == para_NumberedList) {
532 prefix = p->kwtext;
533 prefixextra = conf.listsuffix;
534 indentb = conf.listindentbefore;
535 indenta = conf.listindentafter;
536 } else if (p->type == para_Description) {
537 prefix = NULL;
538 prefixextra = NULL;
539 indentb = conf.listindentbefore;
540 indenta = conf.listindentafter;
541 } else {
542 prefix = NULL;
543 prefixextra = NULL;
544 indentb = indenta = 0;
545 }
546 if (p->type == para_BiblioCited) {
547 body = dup_word_list(p->kwtext);
548 for (wp = body; wp->next; wp = wp->next);
549 wp->next = &spaceword;
550 spaceword.next = p->words;
551 spaceword.alt = NULL;
552 spaceword.type = word_WhiteSpace;
553 spaceword.text = NULL;
554 } else {
555 wp = NULL;
556 body = p->words;
557 }
558 info_para(&currnode->text, prefix, prefixextra, body, keywords,
559 nesting + indentb, indenta,
560 conf.width - nesting - indentb - indenta, &conf);
561 if (wp) {
562 wp->next = NULL;
563 free_word_list(body);
564 }
565 break;
566
567 case para_Code:
568 info_codepara(&currnode->text, p->words,
569 nesting + conf.indent_code,
570 conf.width - nesting - 2 * conf.indent_code);
571 break;
572 }
573
574 /*
575 * Create an index node if required.
576 */
577 if (has_index) {
578 node *newnode;
579 int i, j, k;
580 indexentry *entry;
581 char *nodename;
582
583 nodename = info_node_name_for_text(conf.index_text, &conf);
584 newnode = info_node_new(nodename, conf.charset);
585 sfree(nodename);
586
587 newnode->up = topnode;
588
589 currnode->next = newnode;
590 newnode->prev = currnode;
591 currnode->listnext = newnode;
592
593 k = info_rdadds(&newnode->text, conf.index_text);
594 info_rdaddsc(&newnode->text, "\n");
595 while (k > 0) {
596 info_rdadds(&newnode->text, conf.achapter.underline);
597 k -= ustrwid(conf.achapter.underline, conf.charset);
598 }
599 info_rdaddsc(&newnode->text, "\n\n");
600
601 info_menu_item(&topnode->text, newnode, NULL, &conf);
602
603 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
604 info_idx *ii = (info_idx *)entry->backend_data;
605
606 for (j = 0; j < ii->nnodes; j++) {
607 /*
608 * When we have multiple references for a single
609 * index term, we only display the actual term on
610 * the first line, to make it clear that the terms
611 * really are the same.
612 */
613 if (j == 0)
614 info_rdaddsc(&newnode->text, ii->text);
615 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
616 info_rdaddc(&newnode->text, ' ');
617 info_rdaddsc(&newnode->text, " *Note ");
618 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
619 info_rdaddsc(&newnode->text, "::\n");
620 }
621 }
622 }
623
624 /*
625 * Finalise the text of each node, by adding the ^_ delimiter
626 * and the node line at the top.
627 */
628 for (currnode = topnode; currnode; currnode = currnode->listnext) {
629 char *origtext = currnode->text.output.text;
630 currnode->text = empty_info_data;
631 currnode->text.charset = conf.charset;
632 info_rdaddsc(&currnode->text, "\037\nFile: ");
633 info_rdaddsc(&currnode->text, conf.filename);
634 info_rdaddsc(&currnode->text, ", Node: ");
635 info_rdaddsc(&currnode->text, currnode->name);
636 if (currnode->prev) {
637 info_rdaddsc(&currnode->text, ", Prev: ");
638 info_rdaddsc(&currnode->text, currnode->prev->name);
639 }
640 info_rdaddsc(&currnode->text, ", Up: ");
641 info_rdaddsc(&currnode->text, (currnode->up ?
642 currnode->up->name : "(dir)"));
643 if (currnode->next) {
644 info_rdaddsc(&currnode->text, ", Next: ");
645 info_rdaddsc(&currnode->text, currnode->next->name);
646 }
647 info_rdaddsc(&currnode->text, "\n\n");
648 info_rdaddsc(&currnode->text, origtext);
649 /*
650 * Just make _absolutely_ sure we end with a newline.
651 */
652 if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
653 info_rdaddc(&currnode->text, '\n');
654
655 sfree(origtext);
656 }
657
658 /*
659 * Compute the offsets for the tag table.
660 */
661 filepos = intro_text.output.pos;
662 for (currnode = topnode; currnode; currnode = currnode->listnext) {
663 currnode->pos = filepos;
664 filepos += currnode->text.output.pos;
665 }
666
667 /*
668 * Split into sub-files.
669 */
670 if (conf.maxfilesize > 0) {
671 int currfilesize = intro_text.output.pos, currfilenum = 1;
672 for (currnode = topnode; currnode; currnode = currnode->listnext) {
673 if (currfilesize > intro_text.output.pos &&
674 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
675 currfilenum++;
676 currfilesize = intro_text.output.pos;
677 }
678 currnode->filenum = currfilenum;
679 currfilesize += currnode->text.output.pos;
680 }
681 }
682
683 /*
684 * Write the primary output file.
685 */
686 fp = fopen(conf.filename, "w");
687 if (!fp) {
688 error(err_cantopenw, conf.filename);
689 return;
690 }
691 fputs(intro_text.output.text, fp);
692 if (conf.maxfilesize == 0) {
693 for (currnode = topnode; currnode; currnode = currnode->listnext)
694 fputs(currnode->text.output.text, fp);
695 } else {
696 int filenum = 0;
697 fprintf(fp, "\037\nIndirect:\n");
698 for (currnode = topnode; currnode; currnode = currnode->listnext)
699 if (filenum != currnode->filenum) {
700 filenum = currnode->filenum;
701 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
702 currnode->pos);
703 }
704 }
705 fprintf(fp, "\037\nTag Table:\n");
706 if (conf.maxfilesize > 0)
707 fprintf(fp, "(Indirect)\n");
708 for (currnode = topnode; currnode; currnode = currnode->listnext)
709 fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
710 fprintf(fp, "\037\nEnd Tag Table\n");
711 fclose(fp);
712
713 /*
714 * Write the subfiles.
715 */
716 if (conf.maxfilesize > 0) {
717 int filenum = 0;
718 fp = NULL;
719
720 for (currnode = topnode; currnode; currnode = currnode->listnext) {
721 if (filenum != currnode->filenum) {
722 char *fname;
723
724 filenum = currnode->filenum;
725
726 if (fp)
727 fclose(fp);
728 fname = snewn(strlen(conf.filename) + 40, char);
729 sprintf(fname, "%s-%d", conf.filename, filenum);
730 fp = fopen(fname, "w");
731 if (!fp) {
732 error(err_cantopenw, fname);
733 return;
734 }
735 sfree(fname);
736 fputs(intro_text.output.text, fp);
737 }
738 fputs(currnode->text.output.text, fp);
739 }
740
741 if (fp)
742 fclose(fp);
743 }
744 }
745
746 static int info_check_index(word *w, node *n, indexdata *idx)
747 {
748 int ret = 0;
749
750 for (; w; w = w->next) {
751 if (w->type == word_IndexRef) {
752 indextag *tag;
753 int i;
754
755 tag = index_findtag(idx, w->text);
756 if (!tag)
757 break;
758
759 for (i = 0; i < tag->nrefs; i++) {
760 indexentry *entry = tag->refs[i];
761 info_idx *ii = (info_idx *)entry->backend_data;
762
763 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
764 /*
765 * If the same index term is indexed twice
766 * within the same section, we only want to
767 * mention it once in the index. So do nothing
768 * here.
769 */
770 continue;
771 }
772
773 if (ii->nnodes >= ii->nodesize) {
774 ii->nodesize += 32;
775 ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
776 }
777
778 ii->nodes[ii->nnodes++] = n;
779
780 ret = 1;
781 }
782 }
783 }
784
785 return ret;
786 }
787
788 static word *info_transform_wordlist(word *words, keywordlist *keywords)
789 {
790 word *ret = dup_word_list(words);
791 word *w;
792 keyword *kwl;
793
794 for (w = ret; w; w = w->next) {
795 w->private_data = NULL;
796 if (w->type == word_UpperXref || w->type == word_LowerXref) {
797 kwl = kw_lookup(keywords, w->text);
798 if (kwl) {
799 if (kwl->para->type == para_NumberedList ||
800 kwl->para->type == para_BiblioCited) {
801 /*
802 * In Info, we do nothing special for xrefs to
803 * numbered list items or bibliography entries.
804 */
805 continue;
806 } else {
807 /*
808 * An xref to a different section has its text
809 * completely replaced.
810 */
811 word *w2, *w3, *w4;
812 w2 = w3 = w->next;
813 w4 = NULL;
814 while (w2) {
815 if (w2->type == word_XrefEnd) {
816 w4 = w2->next;
817 w2->next = NULL;
818 break;
819 }
820 w2 = w2->next;
821 }
822 free_word_list(w3);
823
824 /*
825 * Now w is the UpperXref / LowerXref we
826 * started with, and w4 is the next word after
827 * the corresponding XrefEnd (if any). The
828 * simplest thing is just to stick a pointer to
829 * the target node structure in the private
830 * data field of the xref word, and let
831 * info_rdaddwc and friends read the node name
832 * out from there.
833 */
834 w->next = w4;
835 w->private_data = kwl->para->private_data;
836 assert(w->private_data);
837 }
838 }
839 }
840 }
841
842 return ret;
843 }
844
845 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
846 infoconfig *cfg) {
847 int ret = 0;
848
849 for (; words && words != end; words = words->next) switch (words->type) {
850 case word_HyperLink:
851 case word_HyperEnd:
852 case word_XrefEnd:
853 case word_IndexRef:
854 break;
855
856 case word_Normal:
857 case word_Emph:
858 case word_Code:
859 case word_WeakCode:
860 case word_WhiteSpace:
861 case word_EmphSpace:
862 case word_CodeSpace:
863 case word_WkCodeSpace:
864 case word_Quote:
865 case word_EmphQuote:
866 case word_CodeQuote:
867 case word_WkCodeQuote:
868 assert(words->type != word_CodeQuote &&
869 words->type != word_WkCodeQuote);
870 if (towordstyle(words->type) == word_Emph &&
871 (attraux(words->aux) == attr_First ||
872 attraux(words->aux) == attr_Only))
873 ret += info_rdadds(id, cfg->startemph);
874 else if (towordstyle(words->type) == word_Code &&
875 (attraux(words->aux) == attr_First ||
876 attraux(words->aux) == attr_Only))
877 ret += info_rdadds(id, cfg->lquote);
878 if (removeattr(words->type) == word_Normal) {
879 if (cvt_ok(id->charset, words->text) || !words->alt)
880 ret += info_rdadds(id, words->text);
881 else
882 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
883 } else if (removeattr(words->type) == word_WhiteSpace) {
884 ret += info_rdadd(id, L' ');
885 } else if (removeattr(words->type) == word_Quote) {
886 ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
887 cfg->lquote : cfg->rquote);
888 }
889 if (towordstyle(words->type) == word_Emph &&
890 (attraux(words->aux) == attr_Last ||
891 attraux(words->aux) == attr_Only))
892 ret += info_rdadds(id, cfg->endemph);
893 else if (towordstyle(words->type) == word_Code &&
894 (attraux(words->aux) == attr_Last ||
895 attraux(words->aux) == attr_Only))
896 ret += info_rdadds(id, cfg->rquote);
897 break;
898
899 case word_UpperXref:
900 case word_LowerXref:
901 if (xrefs && words->private_data) {
902 /*
903 * This bit is structural and so must be done in char
904 * rather than wchar_t.
905 */
906 ret += info_rdaddsc(id, "*Note ");
907 ret += info_rdaddsc(id, ((node *)words->private_data)->name);
908 ret += info_rdaddsc(id, "::");
909 }
910 break;
911 }
912
913 return ret;
914 }
915
916 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
917
918 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
919 int w = 0;
920 while (words) {
921 w += info_width_internal(words, xrefs, cfg);
922 words = words->next;
923 }
924 return w;
925 }
926
927 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
928 int wid;
929 int attr;
930
931 switch (words->type) {
932 case word_HyperLink:
933 case word_HyperEnd:
934 case word_XrefEnd:
935 case word_IndexRef:
936 return 0;
937
938 case word_UpperXref:
939 case word_LowerXref:
940 if (xrefs && words->private_data) {
941 /* "*Note " plus "::" comes to 8 characters */
942 return 8 + strwid(((node *)words->private_data)->name,
943 cfg->charset);
944 } else
945 return 0;
946 }
947
948 assert(words->type < word_internal_endattrs);
949
950 wid = 0;
951 attr = towordstyle(words->type);
952
953 if (attr == word_Emph || attr == word_Code) {
954 if (attraux(words->aux) == attr_Only ||
955 attraux(words->aux) == attr_First)
956 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
957 cfg->charset);
958 }
959 if (attr == word_Emph || attr == word_Code) {
960 if (attraux(words->aux) == attr_Only ||
961 attraux(words->aux) == attr_Last)
962 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
963 cfg->charset);
964 }
965
966 switch (words->type) {
967 case word_Normal:
968 case word_Emph:
969 case word_Code:
970 case word_WeakCode:
971 if (cvt_ok(cfg->charset, words->text) || !words->alt)
972 wid += ustrwid(words->text, cfg->charset);
973 else
974 wid += info_width_internal_list(words->alt, xrefs, cfg);
975 return wid;
976
977 case word_WhiteSpace:
978 case word_EmphSpace:
979 case word_CodeSpace:
980 case word_WkCodeSpace:
981 case word_Quote:
982 case word_EmphQuote:
983 case word_CodeQuote:
984 case word_WkCodeQuote:
985 assert(words->type != word_CodeQuote &&
986 words->type != word_WkCodeQuote);
987 if (removeattr(words->type) == word_Quote) {
988 if (quoteaux(words->aux) == quote_Open)
989 wid += ustrwid(cfg->lquote, cfg->charset);
990 else
991 wid += ustrwid(cfg->rquote, cfg->charset);
992 } else
993 wid++; /* space */
994 }
995 return wid;
996 }
997
998 static int info_width_noxrefs(void *ctx, word *words)
999 {
1000 return info_width_internal(words, FALSE, (infoconfig *)ctx);
1001 }
1002 static int info_width_xrefs(void *ctx, word *words)
1003 {
1004 return info_width_internal(words, TRUE, (infoconfig *)ctx);
1005 }
1006
1007 static void info_heading(info_data *text, word *tprefix,
1008 word *words, alignstruct align,
1009 int width, infoconfig *cfg) {
1010 int length;
1011 int firstlinewidth, wrapwidth;
1012 wrappedline *wrapping, *p;
1013
1014 length = 0;
1015 if (tprefix) {
1016 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
1017 length += info_rdadds(text, cfg->sectsuffix);
1018 }
1019
1020 wrapwidth = width;
1021 firstlinewidth = width - length;
1022
1023 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
1024 info_width_noxrefs, cfg, 0);
1025 for (p = wrapping; p; p = p->next) {
1026 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
1027 info_rdadd(text, L'\n');
1028 if (*align.underline) {
1029 while (length > 0) {
1030 info_rdadds(text, align.underline);
1031 length -= ustrwid(align.underline, cfg->charset);
1032 }
1033 info_rdadd(text, L'\n');
1034 }
1035 length = 0;
1036 }
1037 wrap_free(wrapping);
1038 info_rdadd(text, L'\n');
1039 }
1040
1041 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
1042 {
1043 while (indent--) info_rdadd(text, L' ');
1044 while (width > 0) {
1045 info_rdadds(text, cfg->rule);
1046 width -= ustrwid(cfg->rule, cfg->charset);
1047 }
1048 info_rdadd(text, L'\n');
1049 info_rdadd(text, L'\n');
1050 }
1051
1052 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
1053 word *input, keywordlist *keywords, int indent,
1054 int extraindent, int width, infoconfig *cfg) {
1055 wrappedline *wrapping, *p;
1056 word *words;
1057 int e;
1058 int i;
1059 int firstlinewidth = width;
1060
1061 words = info_transform_wordlist(input, keywords);
1062
1063 if (prefix) {
1064 for (i = 0; i < indent; i++)
1065 info_rdadd(text, L' ');
1066 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1067 if (prefixextra)
1068 e += info_rdadds(text, prefixextra);
1069 /* If the prefix is too long, shorten the first line to fit. */
1070 e = extraindent - e;
1071 if (e < 0) {
1072 firstlinewidth += e; /* this decreases it, since e < 0 */
1073 if (firstlinewidth < 0) {
1074 e = indent + extraindent;
1075 firstlinewidth = width;
1076 info_rdadd(text, L'\n');
1077 } else
1078 e = 0;
1079 }
1080 } else
1081 e = indent + extraindent;
1082
1083 wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1084 cfg, 0);
1085 for (p = wrapping; p; p = p->next) {
1086 for (i = 0; i < e; i++)
1087 info_rdadd(text, L' ');
1088 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1089 info_rdadd(text, L'\n');
1090 e = indent + extraindent;
1091 }
1092 wrap_free(wrapping);
1093 info_rdadd(text, L'\n');
1094
1095 free_word_list(words);
1096 }
1097
1098 static void info_codepara(info_data *text, word *words,
1099 int indent, int width) {
1100 int i;
1101
1102 for (; words; words = words->next) if (words->type == word_WeakCode) {
1103 for (i = 0; i < indent; i++)
1104 info_rdadd(text, L' ');
1105 if (info_rdadds(text, words->text) > width) {
1106 /* FIXME: warn */
1107 }
1108 info_rdadd(text, L'\n');
1109 }
1110
1111 info_rdadd(text, L'\n');
1112 }
1113
1114 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1115 info_rdadd(text, L'[');
1116 info_rdaddwc(text, words, NULL, FALSE, cfg);
1117 info_rdadds(text, L"]\n");
1118 }
1119
1120 static node *info_node_new(char *name, int charset)
1121 {
1122 node *n;
1123
1124 n = snew(node);
1125 n->text = empty_info_data;
1126 n->text.charset = charset;
1127 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1128 n->name = dupstr(name);
1129 n->started_menu = FALSE;
1130
1131 return n;
1132 }
1133
1134 static char *info_node_name_core(info_data *id, filepos *fpos)
1135 {
1136 char *p, *q;
1137
1138 /*
1139 * We cannot have commas, colons or parentheses in a node name.
1140 * Remove any that we find, with a warning.
1141 */
1142 p = q = id->output.text;
1143 while (*p) {
1144 if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1145 error(err_infonodechar, fpos, *p);
1146 } else {
1147 *q++ = *p;
1148 }
1149 p++;
1150 }
1151 *q = '\0';
1152
1153 return id->output.text;
1154 }
1155
1156 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1157 {
1158 info_data id = EMPTY_INFO_DATA;
1159
1160 id.charset = cfg->charset;
1161 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1162 NULL, FALSE, cfg);
1163 info_rdaddsc(&id, NULL);
1164
1165 return info_node_name_core(&id, &par->fpos);
1166 }
1167
1168 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1169 {
1170 info_data id = EMPTY_INFO_DATA;
1171
1172 id.charset = cfg->charset;
1173 info_rdadds(&id, text);
1174 info_rdaddsc(&id, NULL);
1175
1176 return info_node_name_core(&id, NULL);
1177 }
1178
1179 static void info_menu_item(info_data *text, node *n, paragraph *p,
1180 infoconfig *cfg)
1181 {
1182 /*
1183 * FIXME: Depending on how we're doing node names in this info
1184 * file, we might want to do
1185 *
1186 * * Node name:: Chapter title
1187 *
1188 * _or_
1189 *
1190 * * Chapter number: Node name.
1191 *
1192 * This function mostly works in char rather than wchar_t,
1193 * because a menu item is a structural component.
1194 */
1195 info_rdaddsc(text, "* ");
1196 info_rdaddsc(text, n->name);
1197 info_rdaddsc(text, "::");
1198 if (p) {
1199 info_rdaddc(text, ' ');
1200 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1201 }
1202 info_rdaddc(text, '\n');
1203 }
1204
1205 /*
1206 * These functions implement my wrapper on the rdadd* calls which
1207 * allows me to switch arbitrarily between literal octet-string
1208 * text and charset-translated Unicode. (Because no matter what
1209 * character set I write the actual text in, I expect info readers
1210 * to treat node names and file names literally and to expect
1211 * keywords like `*Note' in their canonical form, so I have to take
1212 * steps to ensure that those structural elements of the file
1213 * aren't messed with.)
1214 */
1215 static int info_rdadds(info_data *d, wchar_t const *wcs)
1216 {
1217 if (!d->wcmode) {
1218 d->state = charset_init_state;
1219 d->wcmode = TRUE;
1220 }
1221
1222 if (wcs) {
1223 char buf[256];
1224 int len, width, ret;
1225
1226 width = ustrwid(wcs, d->charset);
1227
1228 len = ustrlen(wcs);
1229 while (len > 0) {
1230 int prevlen = len;
1231
1232 ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1233 d->charset, &d->state, NULL);
1234
1235 assert(len < prevlen);
1236
1237 if (ret > 0) {
1238 buf[ret] = '\0';
1239 rdaddsc(&d->output, buf);
1240 }
1241 }
1242
1243 return width;
1244 } else
1245 return 0;
1246 }
1247
1248 static int info_rdaddsc(info_data *d, char const *cs)
1249 {
1250 if (d->wcmode) {
1251 char buf[256];
1252 int ret;
1253
1254 ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1255 d->charset, &d->state, NULL);
1256 if (ret > 0) {
1257 buf[ret] = '\0';
1258 rdaddsc(&d->output, buf);
1259 }
1260
1261 d->wcmode = FALSE;
1262 }
1263
1264 if (cs) {
1265 rdaddsc(&d->output, cs);
1266 return strwid(cs, d->charset);
1267 } else
1268 return 0;
1269 }
1270
1271 static int info_rdadd(info_data *d, wchar_t wc)
1272 {
1273 wchar_t wcs[2];
1274 wcs[0] = wc;
1275 wcs[1] = L'\0';
1276 return info_rdadds(d, wcs);
1277 }
1278
1279 static int info_rdaddc(info_data *d, char c)
1280 {
1281 char cs[2];
1282 cs[0] = c;
1283 cs[1] = '\0';
1284 return info_rdaddsc(d, cs);
1285 }