A few thoughts I had while actually using Halibut's Info output.
[sgt/halibut] / bk_info.c
1 /*
2 * info backend for Halibut
3 *
4 * Possible future work:
5 *
6 * - configurable choice of how to allocate node names?
7 * + possibly a template-like approach, choosing node names to
8 * be the full section title or perhaps the internal keyword?
9 * + neither of those seems quite right. Perhaps instead a
10 * Windows Help-like mechanism, where a magic config
11 * directive allows user choice of name for every node.
12 * + Only trouble with that is, now what happens to the section
13 * numbers? Do they become completely vestigial and just sit
14 * in the title text of each node? Or do we keep them in the
15 * menus somehow? I think people might occasionally want to
16 * go to a section by number, if only because all the _other_
17 * formats of the same document will reference the numbers
18 * all the time. So our menu lines could look like one of
19 * these:
20 * * Nodename: Section 1.2. Title of section.
21 * * Section 1.2: Nodename. Title of section.
22 *
23 * - might be helpful to diagnose duplicate node names!
24 *
25 * - more flexibility in heading underlines, like text backend.
26 * + Given info.el's fontifier, we'd want the following defaults:
27 * \cfg{info-title-underline}{*}
28 * \cfg{info-chapter-underline}{=}
29 * \cfg{info-section-underline}{0}{-}
30 * \cfg{info-section-underline}{1}{.}
31 *
32 * - Indices generated by makeinfo use a menu rather than a bunch of
33 * cross-references, which reduces visual clutter rather. For
34 * singly-referenced items, it looks like:
35 * * toner cartridge, replacing: Toner.
36 * It does a horrid job on multiply-referenced entries, though,
37 * perhaps because the name before the colon is meant to be unique.
38 *
39 * - The string "*note" is matched case-insensitively, so we could
40 * make things slightly less ugly by using the lower-case version
41 * when the user asks for \k. Unfortunately, standalone Info seems
42 * to match node names case-sensitively, so we can't downcase that.
43 */
44
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <assert.h>
48 #include "halibut.h"
49
50 typedef struct {
51 char *filename;
52 int maxfilesize;
53 int charset;
54 int listindentbefore, listindentafter;
55 int indent_code, width, index_width;
56 wchar_t *bullet, *listsuffix;
57 wchar_t *startemph, *endemph;
58 wchar_t *lquote, *rquote;
59 wchar_t *sectsuffix, *underline;
60 wchar_t *rule;
61 wchar_t *index_text;
62 } infoconfig;
63
64 typedef struct {
65 rdstringc output;
66 int charset;
67 charset_state state;
68 int wcmode;
69 } info_data;
70 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
71 static const info_data empty_info_data = EMPTY_INFO_DATA;
72
73 typedef struct node_tag node;
74 struct node_tag {
75 node *listnext;
76 node *up, *prev, *next, *lastchild;
77 int pos, started_menu, filenum;
78 char *name;
79 info_data text;
80 };
81
82 typedef struct {
83 char *text;
84 int length;
85 int nnodes, nodesize;
86 node **nodes;
87 } info_idx;
88
89 static int info_rdadd(info_data *, wchar_t);
90 static int info_rdadds(info_data *, wchar_t const *);
91 static int info_rdaddc(info_data *, char);
92 static int info_rdaddsc(info_data *, char const *);
93
94 static void info_heading(info_data *, word *, word *, int, infoconfig *);
95 static void info_rule(info_data *, int, int, infoconfig *);
96 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
97 int, int, int, infoconfig *);
98 static void info_codepara(info_data *, word *, int, int);
99 static void info_versionid(info_data *, word *, infoconfig *);
100 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
101 static word *info_transform_wordlist(word *, keywordlist *);
102 static int info_check_index(word *, node *, indexdata *);
103
104 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
105
106 static node *info_node_new(char *name, int charset);
107 static char *info_node_name_for_para(paragraph *p, infoconfig *);
108 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
109
110 static infoconfig info_configure(paragraph *source) {
111 infoconfig ret;
112 paragraph *p;
113
114 /*
115 * Defaults.
116 */
117 ret.filename = dupstr("output.info");
118 ret.maxfilesize = 64 << 10;
119 ret.charset = CS_ASCII;
120 ret.width = 70;
121 ret.listindentbefore = 1;
122 ret.listindentafter = 3;
123 ret.indent_code = 2;
124 ret.index_width = 40;
125 ret.listsuffix = L".";
126 ret.bullet = L"\x2022\0-\0\0";
127 ret.rule = L"\x2500\0-\0\0";
128 ret.startemph = L"_\0_\0\0";
129 ret.endemph = uadv(ret.startemph);
130 ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
131 ret.rquote = uadv(ret.lquote);
132 ret.sectsuffix = L": ";
133 ret.underline = L"\x203E\0-\0\0";
134 ret.index_text = L"Index";
135
136 /*
137 * Two-pass configuration so that we can pick up global config
138 * (e.g. `quotes') before having it overridden by specific
139 * config (`info-quotes'), irrespective of the order in which
140 * they occur.
141 */
142 for (p = source; p; p = p->next) {
143 if (p->type == para_Config) {
144 if (!ustricmp(p->keyword, L"quotes")) {
145 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
146 ret.lquote = uadv(p->keyword);
147 ret.rquote = uadv(ret.lquote);
148 }
149 } else if (!ustricmp(p->keyword, L"index")) {
150 ret.index_text = uadv(p->keyword);
151 }
152 }
153 }
154
155 for (p = source; p; p = p->next) {
156 if (p->type == para_Config) {
157 if (!ustricmp(p->keyword, L"info-filename")) {
158 sfree(ret.filename);
159 ret.filename = dupstr(adv(p->origkeyword));
160 } else if (!ustricmp(p->keyword, L"info-charset")) {
161 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
162 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
163 ret.maxfilesize = utoi(uadv(p->keyword));
164 } else if (!ustricmp(p->keyword, L"info-width")) {
165 ret.width = utoi(uadv(p->keyword));
166 } else if (!ustricmp(p->keyword, L"info-indent-code")) {
167 ret.indent_code = utoi(uadv(p->keyword));
168 } else if (!ustricmp(p->keyword, L"info-index-width")) {
169 ret.index_width = utoi(uadv(p->keyword));
170 } else if (!ustricmp(p->keyword, L"info-list-indent")) {
171 ret.listindentbefore = utoi(uadv(p->keyword));
172 } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
173 ret.listindentafter = utoi(uadv(p->keyword));
174 } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
175 ret.sectsuffix = uadv(p->keyword);
176 } else if (!ustricmp(p->keyword, L"info-underline")) {
177 ret.underline = uadv(p->keyword);
178 } else if (!ustricmp(p->keyword, L"info-bullet")) {
179 ret.bullet = uadv(p->keyword);
180 } else if (!ustricmp(p->keyword, L"info-rule")) {
181 ret.rule = uadv(p->keyword);
182 } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
183 ret.listsuffix = uadv(p->keyword);
184 } else if (!ustricmp(p->keyword, L"info-emphasis")) {
185 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
186 ret.startemph = uadv(p->keyword);
187 ret.endemph = uadv(ret.startemph);
188 }
189 } else if (!ustricmp(p->keyword, L"info-quotes")) {
190 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
191 ret.lquote = uadv(p->keyword);
192 ret.rquote = uadv(ret.lquote);
193 }
194 }
195 }
196 }
197
198 /*
199 * Now process fallbacks on quote characters, underlines, the
200 * rule character, the emphasis characters, and bullets.
201 */
202 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
203 (!cvt_ok(ret.charset, ret.lquote) ||
204 !cvt_ok(ret.charset, ret.rquote))) {
205 ret.lquote = uadv(ret.rquote);
206 ret.rquote = uadv(ret.lquote);
207 }
208
209 while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
210 (!cvt_ok(ret.charset, ret.startemph) ||
211 !cvt_ok(ret.charset, ret.endemph))) {
212 ret.startemph = uadv(ret.endemph);
213 ret.endemph = uadv(ret.startemph);
214 }
215
216 while (*ret.underline && *uadv(ret.underline) &&
217 !cvt_ok(ret.charset, ret.underline))
218 ret.underline = uadv(ret.underline);
219
220 while (*ret.bullet && *uadv(ret.bullet) &&
221 !cvt_ok(ret.charset, ret.bullet))
222 ret.bullet = uadv(ret.bullet);
223
224 while (*ret.rule && *uadv(ret.rule) &&
225 !cvt_ok(ret.charset, ret.rule))
226 ret.rule = uadv(ret.rule);
227
228 return ret;
229 }
230
231 paragraph *info_config_filename(char *filename)
232 {
233 return cmdline_cfg_simple("info-filename", filename, NULL);
234 }
235
236 void info_backend(paragraph *sourceform, keywordlist *keywords,
237 indexdata *idx, void *unused) {
238 paragraph *p;
239 infoconfig conf;
240 word *prefix, *body, *wp;
241 word spaceword;
242 wchar_t *prefixextra;
243 int nesting, nestindent;
244 int indentb, indenta;
245 int filepos;
246 int has_index;
247 info_data intro_text = EMPTY_INFO_DATA;
248 node *topnode, *currnode;
249 word bullet;
250 FILE *fp;
251
252 IGNORE(unused);
253
254 conf = info_configure(sourceform);
255
256 /*
257 * Go through and create a node for each section.
258 */
259 topnode = info_node_new("Top", conf.charset);
260 currnode = topnode;
261 for (p = sourceform; p; p = p->next) switch (p->type) {
262 /*
263 * Chapter titles.
264 */
265 case para_Chapter:
266 case para_Appendix:
267 case para_UnnumberedChapter:
268 case para_Heading:
269 case para_Subsect:
270 {
271 node *newnode, *upnode;
272 char *nodename;
273
274 nodename = info_node_name_for_para(p, &conf);
275 newnode = info_node_new(nodename, conf.charset);
276 sfree(nodename);
277
278 p->private_data = newnode;
279
280 if (p->parent)
281 upnode = (node *)p->parent->private_data;
282 else
283 upnode = topnode;
284 assert(upnode);
285 newnode->up = upnode;
286
287 currnode->next = newnode;
288 newnode->prev = currnode;
289
290 currnode->listnext = newnode;
291 currnode = newnode;
292 }
293 break;
294 default:
295 p->private_data = NULL;
296 break;
297 }
298
299 /*
300 * Set up the display form of each index entry.
301 */
302 {
303 int i;
304 indexentry *entry;
305
306 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
307 info_idx *ii = snew(info_idx);
308 info_data id = EMPTY_INFO_DATA;
309
310 id.charset = conf.charset;
311
312 ii->nnodes = ii->nodesize = 0;
313 ii->nodes = NULL;
314
315 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
316
317 ii->text = id.output.text;
318
319 entry->backend_data = ii;
320 }
321 }
322
323 /*
324 * An Info file begins with a piece of introductory text which
325 * is apparently never shown anywhere. This seems to me to be a
326 * good place to put the copyright notice and the version IDs.
327 * Also, Info directory entries are expected to go here.
328 */
329 intro_text.charset = conf.charset;
330
331 info_rdaddsc(&intro_text,
332 "This Info file generated by Halibut, ");
333 info_rdaddsc(&intro_text, version);
334 info_rdaddsc(&intro_text, "\n\n");
335
336 for (p = sourceform; p; p = p->next)
337 if (p->type == para_Config &&
338 !ustricmp(p->keyword, L"info-dir-entry")) {
339 wchar_t *section, *shortname, *longname, *kw;
340 char *s;
341
342 section = uadv(p->keyword);
343 shortname = *section ? uadv(section) : L"";
344 longname = *shortname ? uadv(shortname) : L"";
345 kw = *longname ? uadv(longname) : L"";
346
347 if (!*longname) {
348 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
349 continue;
350 }
351
352 info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
353 info_rdadds(&intro_text, section);
354 info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
355 info_rdadds(&intro_text, shortname);
356 info_rdaddsc(&intro_text, ": (");
357 s = dupstr(conf.filename);
358 if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
359 s[strlen(s)-5] = '\0';
360 info_rdaddsc(&intro_text, s);
361 sfree(s);
362 info_rdaddsc(&intro_text, ")");
363 if (*kw) {
364 keyword *kwl = kw_lookup(keywords, kw);
365 if (kwl && kwl->para->private_data) {
366 node *n = (node *)kwl->para->private_data;
367 info_rdaddsc(&intro_text, n->name);
368 }
369 }
370 info_rdaddsc(&intro_text, ". ");
371 info_rdadds(&intro_text, longname);
372 info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
373 }
374
375 for (p = sourceform; p; p = p->next)
376 if (p->type == para_Copyright)
377 info_para(&intro_text, NULL, NULL, p->words, keywords,
378 0, 0, conf.width, &conf);
379
380 for (p = sourceform; p; p = p->next)
381 if (p->type == para_VersionID)
382 info_versionid(&intro_text, p->words, &conf);
383
384 if (intro_text.output.text[intro_text.output.pos-1] != '\n')
385 info_rdaddc(&intro_text, '\n');
386
387 /* Do the title */
388 for (p = sourceform; p; p = p->next)
389 if (p->type == para_Title)
390 info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
391
392 nestindent = conf.listindentbefore + conf.listindentafter;
393 nesting = 0;
394
395 currnode = topnode;
396
397 /* Do the main document */
398 for (p = sourceform; p; p = p->next) switch (p->type) {
399
400 case para_QuotePush:
401 nesting += 2;
402 break;
403 case para_QuotePop:
404 nesting -= 2;
405 assert(nesting >= 0);
406 break;
407
408 case para_LcontPush:
409 nesting += nestindent;
410 break;
411 case para_LcontPop:
412 nesting -= nestindent;
413 assert(nesting >= 0);
414 break;
415
416 /*
417 * Things we ignore because we've already processed them or
418 * aren't going to touch them in this pass.
419 */
420 case para_IM:
421 case para_BR:
422 case para_Biblio: /* only touch BiblioCited */
423 case para_VersionID:
424 case para_NoCite:
425 case para_Title:
426 break;
427
428 /*
429 * Chapter titles.
430 */
431 case para_Chapter:
432 case para_Appendix:
433 case para_UnnumberedChapter:
434 case para_Heading:
435 case para_Subsect:
436 currnode = p->private_data;
437 assert(currnode);
438 assert(currnode->up);
439
440 if (!currnode->up->started_menu) {
441 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
442 currnode->up->started_menu = TRUE;
443 }
444 info_menu_item(&currnode->up->text, currnode, p, &conf);
445
446 has_index |= info_check_index(p->words, currnode, idx);
447 info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
448 nesting = 0;
449 break;
450
451 case para_Rule:
452 info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
453 break;
454
455 case para_Normal:
456 case para_Copyright:
457 case para_DescribedThing:
458 case para_Description:
459 case para_BiblioCited:
460 case para_Bullet:
461 case para_NumberedList:
462 has_index |= info_check_index(p->words, currnode, idx);
463 if (p->type == para_Bullet) {
464 bullet.next = NULL;
465 bullet.alt = NULL;
466 bullet.type = word_Normal;
467 bullet.text = conf.bullet;
468 prefix = &bullet;
469 prefixextra = NULL;
470 indentb = conf.listindentbefore;
471 indenta = conf.listindentafter;
472 } else if (p->type == para_NumberedList) {
473 prefix = p->kwtext;
474 prefixextra = conf.listsuffix;
475 indentb = conf.listindentbefore;
476 indenta = conf.listindentafter;
477 } else if (p->type == para_Description) {
478 prefix = NULL;
479 prefixextra = NULL;
480 indentb = conf.listindentbefore;
481 indenta = conf.listindentafter;
482 } else {
483 prefix = NULL;
484 prefixextra = NULL;
485 indentb = indenta = 0;
486 }
487 if (p->type == para_BiblioCited) {
488 body = dup_word_list(p->kwtext);
489 for (wp = body; wp->next; wp = wp->next);
490 wp->next = &spaceword;
491 spaceword.next = p->words;
492 spaceword.alt = NULL;
493 spaceword.type = word_WhiteSpace;
494 spaceword.text = NULL;
495 } else {
496 wp = NULL;
497 body = p->words;
498 }
499 info_para(&currnode->text, prefix, prefixextra, body, keywords,
500 nesting + indentb, indenta,
501 conf.width - nesting - indentb - indenta, &conf);
502 if (wp) {
503 wp->next = NULL;
504 free_word_list(body);
505 }
506 break;
507
508 case para_Code:
509 info_codepara(&currnode->text, p->words,
510 nesting + conf.indent_code,
511 conf.width - nesting - 2 * conf.indent_code);
512 break;
513 }
514
515 /*
516 * Create an index node if required.
517 */
518 if (has_index) {
519 node *newnode;
520 int i, j, k;
521 indexentry *entry;
522 char *nodename;
523
524 nodename = info_node_name_for_text(conf.index_text, &conf);
525 newnode = info_node_new(nodename, conf.charset);
526 sfree(nodename);
527
528 newnode->up = topnode;
529
530 currnode->next = newnode;
531 newnode->prev = currnode;
532 currnode->listnext = newnode;
533
534 k = info_rdadds(&newnode->text, conf.index_text);
535 info_rdaddsc(&newnode->text, "\n");
536 while (k > 0) {
537 info_rdadds(&newnode->text, conf.underline);
538 k -= ustrwid(conf.underline, conf.charset);
539 }
540 info_rdaddsc(&newnode->text, "\n\n");
541
542 info_menu_item(&topnode->text, newnode, NULL, &conf);
543
544 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
545 info_idx *ii = (info_idx *)entry->backend_data;
546
547 for (j = 0; j < ii->nnodes; j++) {
548 /*
549 * When we have multiple references for a single
550 * index term, we only display the actual term on
551 * the first line, to make it clear that the terms
552 * really are the same.
553 */
554 if (j == 0)
555 info_rdaddsc(&newnode->text, ii->text);
556 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
557 info_rdaddc(&newnode->text, ' ');
558 info_rdaddsc(&newnode->text, " *Note ");
559 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
560 info_rdaddsc(&newnode->text, "::\n");
561 }
562 }
563 }
564
565 /*
566 * Finalise the text of each node, by adding the ^_ delimiter
567 * and the node line at the top.
568 */
569 for (currnode = topnode; currnode; currnode = currnode->listnext) {
570 char *origtext = currnode->text.output.text;
571 currnode->text = empty_info_data;
572 currnode->text.charset = conf.charset;
573 info_rdaddsc(&currnode->text, "\037\nFile: ");
574 info_rdaddsc(&currnode->text, conf.filename);
575 info_rdaddsc(&currnode->text, ", Node: ");
576 info_rdaddsc(&currnode->text, currnode->name);
577 if (currnode->prev) {
578 info_rdaddsc(&currnode->text, ", Prev: ");
579 info_rdaddsc(&currnode->text, currnode->prev->name);
580 }
581 info_rdaddsc(&currnode->text, ", Up: ");
582 info_rdaddsc(&currnode->text, (currnode->up ?
583 currnode->up->name : "(dir)"));
584 if (currnode->next) {
585 info_rdaddsc(&currnode->text, ", Next: ");
586 info_rdaddsc(&currnode->text, currnode->next->name);
587 }
588 info_rdaddsc(&currnode->text, "\n\n");
589 info_rdaddsc(&currnode->text, origtext);
590 /*
591 * Just make _absolutely_ sure we end with a newline.
592 */
593 if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
594 info_rdaddc(&currnode->text, '\n');
595
596 sfree(origtext);
597 }
598
599 /*
600 * Compute the offsets for the tag table.
601 */
602 filepos = intro_text.output.pos;
603 for (currnode = topnode; currnode; currnode = currnode->listnext) {
604 currnode->pos = filepos;
605 filepos += currnode->text.output.pos;
606 }
607
608 /*
609 * Split into sub-files.
610 */
611 if (conf.maxfilesize > 0) {
612 int currfilesize = intro_text.output.pos, currfilenum = 1;
613 for (currnode = topnode; currnode; currnode = currnode->listnext) {
614 if (currfilesize > intro_text.output.pos &&
615 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
616 currfilenum++;
617 currfilesize = intro_text.output.pos;
618 }
619 currnode->filenum = currfilenum;
620 currfilesize += currnode->text.output.pos;
621 }
622 }
623
624 /*
625 * Write the primary output file.
626 */
627 fp = fopen(conf.filename, "w");
628 if (!fp) {
629 error(err_cantopenw, conf.filename);
630 return;
631 }
632 fputs(intro_text.output.text, fp);
633 if (conf.maxfilesize == 0) {
634 for (currnode = topnode; currnode; currnode = currnode->listnext)
635 fputs(currnode->text.output.text, fp);
636 } else {
637 int filenum = 0;
638 fprintf(fp, "\037\nIndirect:\n");
639 for (currnode = topnode; currnode; currnode = currnode->listnext)
640 if (filenum != currnode->filenum) {
641 filenum = currnode->filenum;
642 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
643 currnode->pos);
644 }
645 }
646 fprintf(fp, "\037\nTag Table:\n");
647 if (conf.maxfilesize > 0)
648 fprintf(fp, "(Indirect)\n");
649 for (currnode = topnode; currnode; currnode = currnode->listnext)
650 fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
651 fprintf(fp, "\037\nEnd Tag Table\n");
652 fclose(fp);
653
654 /*
655 * Write the subfiles.
656 */
657 if (conf.maxfilesize > 0) {
658 int filenum = 0;
659 fp = NULL;
660
661 for (currnode = topnode; currnode; currnode = currnode->listnext) {
662 if (filenum != currnode->filenum) {
663 char *fname;
664
665 filenum = currnode->filenum;
666
667 if (fp)
668 fclose(fp);
669 fname = snewn(strlen(conf.filename) + 40, char);
670 sprintf(fname, "%s-%d", conf.filename, filenum);
671 fp = fopen(fname, "w");
672 if (!fp) {
673 error(err_cantopenw, fname);
674 return;
675 }
676 sfree(fname);
677 fputs(intro_text.output.text, fp);
678 }
679 fputs(currnode->text.output.text, fp);
680 }
681
682 if (fp)
683 fclose(fp);
684 }
685 }
686
687 static int info_check_index(word *w, node *n, indexdata *idx)
688 {
689 int ret = 0;
690
691 for (; w; w = w->next) {
692 if (w->type == word_IndexRef) {
693 indextag *tag;
694 int i;
695
696 tag = index_findtag(idx, w->text);
697 if (!tag)
698 break;
699
700 for (i = 0; i < tag->nrefs; i++) {
701 indexentry *entry = tag->refs[i];
702 info_idx *ii = (info_idx *)entry->backend_data;
703
704 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
705 /*
706 * If the same index term is indexed twice
707 * within the same section, we only want to
708 * mention it once in the index. So do nothing
709 * here.
710 */
711 continue;
712 }
713
714 if (ii->nnodes >= ii->nodesize) {
715 ii->nodesize += 32;
716 ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
717 }
718
719 ii->nodes[ii->nnodes++] = n;
720
721 ret = 1;
722 }
723 }
724 }
725
726 return ret;
727 }
728
729 static word *info_transform_wordlist(word *words, keywordlist *keywords)
730 {
731 word *ret = dup_word_list(words);
732 word *w;
733 keyword *kwl;
734
735 for (w = ret; w; w = w->next) {
736 w->private_data = NULL;
737 if (w->type == word_UpperXref || w->type == word_LowerXref) {
738 kwl = kw_lookup(keywords, w->text);
739 if (kwl) {
740 if (kwl->para->type == para_NumberedList ||
741 kwl->para->type == para_BiblioCited) {
742 /*
743 * In Info, we do nothing special for xrefs to
744 * numbered list items or bibliography entries.
745 */
746 continue;
747 } else {
748 /*
749 * An xref to a different section has its text
750 * completely replaced.
751 */
752 word *w2, *w3, *w4;
753 w2 = w3 = w->next;
754 w4 = NULL;
755 while (w2) {
756 if (w2->type == word_XrefEnd) {
757 w4 = w2->next;
758 w2->next = NULL;
759 break;
760 }
761 w2 = w2->next;
762 }
763 free_word_list(w3);
764
765 /*
766 * Now w is the UpperXref / LowerXref we
767 * started with, and w4 is the next word after
768 * the corresponding XrefEnd (if any). The
769 * simplest thing is just to stick a pointer to
770 * the target node structure in the private
771 * data field of the xref word, and let
772 * info_rdaddwc and friends read the node name
773 * out from there.
774 */
775 w->next = w4;
776 w->private_data = kwl->para->private_data;
777 assert(w->private_data);
778 }
779 }
780 }
781 }
782
783 return ret;
784 }
785
786 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
787 infoconfig *cfg) {
788 int ret = 0;
789
790 for (; words && words != end; words = words->next) switch (words->type) {
791 case word_HyperLink:
792 case word_HyperEnd:
793 case word_XrefEnd:
794 case word_IndexRef:
795 break;
796
797 case word_Normal:
798 case word_Emph:
799 case word_Code:
800 case word_WeakCode:
801 case word_WhiteSpace:
802 case word_EmphSpace:
803 case word_CodeSpace:
804 case word_WkCodeSpace:
805 case word_Quote:
806 case word_EmphQuote:
807 case word_CodeQuote:
808 case word_WkCodeQuote:
809 assert(words->type != word_CodeQuote &&
810 words->type != word_WkCodeQuote);
811 if (towordstyle(words->type) == word_Emph &&
812 (attraux(words->aux) == attr_First ||
813 attraux(words->aux) == attr_Only))
814 ret += info_rdadds(id, cfg->startemph);
815 else if (towordstyle(words->type) == word_Code &&
816 (attraux(words->aux) == attr_First ||
817 attraux(words->aux) == attr_Only))
818 ret += info_rdadds(id, cfg->lquote);
819 if (removeattr(words->type) == word_Normal) {
820 if (cvt_ok(id->charset, words->text) || !words->alt)
821 ret += info_rdadds(id, words->text);
822 else
823 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
824 } else if (removeattr(words->type) == word_WhiteSpace) {
825 ret += info_rdadd(id, L' ');
826 } else if (removeattr(words->type) == word_Quote) {
827 ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
828 cfg->lquote : cfg->rquote);
829 }
830 if (towordstyle(words->type) == word_Emph &&
831 (attraux(words->aux) == attr_Last ||
832 attraux(words->aux) == attr_Only))
833 ret += info_rdadds(id, cfg->endemph);
834 else if (towordstyle(words->type) == word_Code &&
835 (attraux(words->aux) == attr_Last ||
836 attraux(words->aux) == attr_Only))
837 ret += info_rdadds(id, cfg->rquote);
838 break;
839
840 case word_UpperXref:
841 case word_LowerXref:
842 if (xrefs && words->private_data) {
843 /*
844 * This bit is structural and so must be done in char
845 * rather than wchar_t.
846 */
847 ret += info_rdaddsc(id, "*Note ");
848 ret += info_rdaddsc(id, ((node *)words->private_data)->name);
849 ret += info_rdaddsc(id, "::");
850 }
851 break;
852 }
853
854 return ret;
855 }
856
857 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
858
859 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
860 int w = 0;
861 while (words) {
862 w += info_width_internal(words, xrefs, cfg);
863 words = words->next;
864 }
865 return w;
866 }
867
868 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
869 int wid;
870 int attr;
871
872 switch (words->type) {
873 case word_HyperLink:
874 case word_HyperEnd:
875 case word_XrefEnd:
876 case word_IndexRef:
877 return 0;
878
879 case word_UpperXref:
880 case word_LowerXref:
881 if (xrefs && words->private_data) {
882 /* "*Note " plus "::" comes to 8 characters */
883 return 8 + strwid(((node *)words->private_data)->name,
884 cfg->charset);
885 } else
886 return 0;
887 }
888
889 assert(words->type < word_internal_endattrs);
890
891 wid = 0;
892 attr = towordstyle(words->type);
893
894 if (attr == word_Emph || attr == word_Code) {
895 if (attraux(words->aux) == attr_Only ||
896 attraux(words->aux) == attr_First)
897 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
898 cfg->charset);
899 }
900 if (attr == word_Emph || attr == word_Code) {
901 if (attraux(words->aux) == attr_Only ||
902 attraux(words->aux) == attr_Last)
903 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
904 cfg->charset);
905 }
906
907 switch (words->type) {
908 case word_Normal:
909 case word_Emph:
910 case word_Code:
911 case word_WeakCode:
912 if (cvt_ok(cfg->charset, words->text) || !words->alt)
913 wid += ustrwid(words->text, cfg->charset);
914 else
915 wid += info_width_internal_list(words->alt, xrefs, cfg);
916 return wid;
917
918 case word_WhiteSpace:
919 case word_EmphSpace:
920 case word_CodeSpace:
921 case word_WkCodeSpace:
922 case word_Quote:
923 case word_EmphQuote:
924 case word_CodeQuote:
925 case word_WkCodeQuote:
926 assert(words->type != word_CodeQuote &&
927 words->type != word_WkCodeQuote);
928 if (removeattr(words->type) == word_Quote) {
929 if (quoteaux(words->aux) == quote_Open)
930 wid += ustrwid(cfg->lquote, cfg->charset);
931 else
932 wid += ustrwid(cfg->rquote, cfg->charset);
933 } else
934 wid++; /* space */
935 }
936 return wid;
937 }
938
939 static int info_width_noxrefs(void *ctx, word *words)
940 {
941 return info_width_internal(words, FALSE, (infoconfig *)ctx);
942 }
943 static int info_width_xrefs(void *ctx, word *words)
944 {
945 return info_width_internal(words, TRUE, (infoconfig *)ctx);
946 }
947
948 static void info_heading(info_data *text, word *tprefix,
949 word *words, int width, infoconfig *cfg) {
950 int length;
951 int firstlinewidth, wrapwidth;
952 wrappedline *wrapping, *p;
953
954 length = 0;
955 if (tprefix) {
956 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
957 length += info_rdadds(text, cfg->sectsuffix);
958 }
959
960 wrapwidth = width;
961 firstlinewidth = width - length;
962
963 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
964 info_width_noxrefs, cfg, 0);
965 for (p = wrapping; p; p = p->next) {
966 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
967 info_rdadd(text, L'\n');
968 while (length > 0) {
969 info_rdadds(text, cfg->underline);
970 length -= ustrwid(cfg->underline, cfg->charset);
971 }
972 info_rdadd(text, L'\n');
973 length = 0;
974 }
975 wrap_free(wrapping);
976 info_rdadd(text, L'\n');
977 }
978
979 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
980 {
981 while (indent--) info_rdadd(text, L' ');
982 while (width > 0) {
983 info_rdadds(text, cfg->rule);
984 width -= ustrwid(cfg->rule, cfg->charset);
985 }
986 info_rdadd(text, L'\n');
987 info_rdadd(text, L'\n');
988 }
989
990 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
991 word *input, keywordlist *keywords, int indent,
992 int extraindent, int width, infoconfig *cfg) {
993 wrappedline *wrapping, *p;
994 word *words;
995 int e;
996 int i;
997 int firstlinewidth = width;
998
999 words = info_transform_wordlist(input, keywords);
1000
1001 if (prefix) {
1002 for (i = 0; i < indent; i++)
1003 info_rdadd(text, L' ');
1004 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1005 if (prefixextra)
1006 e += info_rdadds(text, prefixextra);
1007 /* If the prefix is too long, shorten the first line to fit. */
1008 e = extraindent - e;
1009 if (e < 0) {
1010 firstlinewidth += e; /* this decreases it, since e < 0 */
1011 if (firstlinewidth < 0) {
1012 e = indent + extraindent;
1013 firstlinewidth = width;
1014 info_rdadd(text, L'\n');
1015 } else
1016 e = 0;
1017 }
1018 } else
1019 e = indent + extraindent;
1020
1021 wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1022 cfg, 0);
1023 for (p = wrapping; p; p = p->next) {
1024 for (i = 0; i < e; i++)
1025 info_rdadd(text, L' ');
1026 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1027 info_rdadd(text, L'\n');
1028 e = indent + extraindent;
1029 }
1030 wrap_free(wrapping);
1031 info_rdadd(text, L'\n');
1032
1033 free_word_list(words);
1034 }
1035
1036 static void info_codepara(info_data *text, word *words,
1037 int indent, int width) {
1038 int i;
1039
1040 for (; words; words = words->next) if (words->type == word_WeakCode) {
1041 for (i = 0; i < indent; i++)
1042 info_rdadd(text, L' ');
1043 if (info_rdadds(text, words->text) > width) {
1044 /* FIXME: warn */
1045 }
1046 info_rdadd(text, L'\n');
1047 }
1048
1049 info_rdadd(text, L'\n');
1050 }
1051
1052 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1053 info_rdadd(text, L'[');
1054 info_rdaddwc(text, words, NULL, FALSE, cfg);
1055 info_rdadds(text, L"]\n");
1056 }
1057
1058 static node *info_node_new(char *name, int charset)
1059 {
1060 node *n;
1061
1062 n = snew(node);
1063 n->text = empty_info_data;
1064 n->text.charset = charset;
1065 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1066 n->name = dupstr(name);
1067 n->started_menu = FALSE;
1068
1069 return n;
1070 }
1071
1072 static char *info_node_name_core(info_data *id, filepos *fpos)
1073 {
1074 char *p, *q;
1075
1076 /*
1077 * We cannot have commas, colons or parentheses in a node name.
1078 * Remove any that we find, with a warning.
1079 */
1080 p = q = id->output.text;
1081 while (*p) {
1082 if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1083 error(err_infonodechar, fpos, *p);
1084 } else {
1085 *q++ = *p;
1086 }
1087 p++;
1088 }
1089 *q = '\0';
1090
1091 return id->output.text;
1092 }
1093
1094 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1095 {
1096 info_data id = EMPTY_INFO_DATA;
1097
1098 id.charset = cfg->charset;
1099 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1100 NULL, FALSE, cfg);
1101 info_rdaddsc(&id, NULL);
1102
1103 return info_node_name_core(&id, &par->fpos);
1104 }
1105
1106 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1107 {
1108 info_data id = EMPTY_INFO_DATA;
1109
1110 id.charset = cfg->charset;
1111 info_rdadds(&id, text);
1112 info_rdaddsc(&id, NULL);
1113
1114 return info_node_name_core(&id, NULL);
1115 }
1116
1117 static void info_menu_item(info_data *text, node *n, paragraph *p,
1118 infoconfig *cfg)
1119 {
1120 /*
1121 * FIXME: Depending on how we're doing node names in this info
1122 * file, we might want to do
1123 *
1124 * * Node name:: Chapter title
1125 *
1126 * _or_
1127 *
1128 * * Chapter number: Node name.
1129 *
1130 * This function mostly works in char rather than wchar_t,
1131 * because a menu item is a structural component.
1132 */
1133 info_rdaddsc(text, "* ");
1134 info_rdaddsc(text, n->name);
1135 info_rdaddsc(text, "::");
1136 if (p) {
1137 info_rdaddc(text, ' ');
1138 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1139 }
1140 info_rdaddc(text, '\n');
1141 }
1142
1143 /*
1144 * These functions implement my wrapper on the rdadd* calls which
1145 * allows me to switch arbitrarily between literal octet-string
1146 * text and charset-translated Unicode. (Because no matter what
1147 * character set I write the actual text in, I expect info readers
1148 * to treat node names and file names literally and to expect
1149 * keywords like `*Note' in their canonical form, so I have to take
1150 * steps to ensure that those structural elements of the file
1151 * aren't messed with.)
1152 */
1153 static int info_rdadds(info_data *d, wchar_t const *wcs)
1154 {
1155 if (!d->wcmode) {
1156 d->state = charset_init_state;
1157 d->wcmode = TRUE;
1158 }
1159
1160 if (wcs) {
1161 char buf[256];
1162 int len, width, ret;
1163
1164 width = ustrwid(wcs, d->charset);
1165
1166 len = ustrlen(wcs);
1167 while (len > 0) {
1168 int prevlen = len;
1169
1170 ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1171 d->charset, &d->state, NULL);
1172
1173 assert(len < prevlen);
1174
1175 if (ret > 0) {
1176 buf[ret] = '\0';
1177 rdaddsc(&d->output, buf);
1178 }
1179 }
1180
1181 return width;
1182 } else
1183 return 0;
1184 }
1185
1186 static int info_rdaddsc(info_data *d, char const *cs)
1187 {
1188 if (d->wcmode) {
1189 char buf[256];
1190 int ret;
1191
1192 ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1193 d->charset, &d->state, NULL);
1194 if (ret > 0) {
1195 buf[ret] = '\0';
1196 rdaddsc(&d->output, buf);
1197 }
1198
1199 d->wcmode = FALSE;
1200 }
1201
1202 if (cs) {
1203 rdaddsc(&d->output, cs);
1204 return strwid(cs, d->charset);
1205 } else
1206 return 0;
1207 }
1208
1209 static int info_rdadd(info_data *d, wchar_t wc)
1210 {
1211 wchar_t wcs[2];
1212 wcs[0] = wc;
1213 wcs[1] = L'\0';
1214 return info_rdadds(d, wcs);
1215 }
1216
1217 static int info_rdaddc(info_data *d, char c)
1218 {
1219 char cs[2];
1220 cs[0] = c;
1221 cs[1] = '\0';
1222 return info_rdaddsc(d, cs);
1223 }