Another observation about indices.
[sgt/halibut] / bk_info.c
1 /*
2 * info backend for Halibut
3 *
4 * Possible future work:
5 *
6 * - configurable choice of how to allocate node names?
7 * + possibly a template-like approach, choosing node names to
8 * be the full section title or perhaps the internal keyword?
9 * + neither of those seems quite right. Perhaps instead a
10 * Windows Help-like mechanism, where a magic config
11 * directive allows user choice of name for every node.
12 * + Only trouble with that is, now what happens to the section
13 * numbers? Do they become completely vestigial and just sit
14 * in the title text of each node? Or do we keep them in the
15 * menus somehow? I think people might occasionally want to
16 * go to a section by number, if only because all the _other_
17 * formats of the same document will reference the numbers
18 * all the time. So our menu lines could look like one of
19 * these:
20 * * Nodename: Section 1.2. Title of section.
21 * * Section 1.2: Nodename. Title of section.
22 *
23 * - might be helpful to diagnose duplicate node names!
24 *
25 * - more flexibility in heading underlines, like text backend.
26 * + Given info.el's fontifier, we'd want the following defaults:
27 * \cfg{info-title-underline}{*}
28 * \cfg{info-chapter-underline}{=}
29 * \cfg{info-section-underline}{0}{-}
30 * \cfg{info-section-underline}{1}{.}
31 *
32 * - Indices generated by makeinfo use a menu rather than a bunch of
33 * cross-references, which reduces visual clutter rather. For
34 * singly-referenced items, it looks like:
35 * * toner cartridge, replacing: Toner.
36 * It does a horrid job on multiply-referenced entries, though,
37 * perhaps because the name before the colon is meant to be unique.
38 * Info's 'i' command requires the use of a menu -- it fails to
39 * find any index entries at all with Halibut's current index format.
40 *
41 * - The string "*note" is matched case-insensitively, so we could
42 * make things slightly less ugly by using the lower-case version
43 * when the user asks for \k. Unfortunately, standalone Info seems
44 * to match node names case-sensitively, so we can't downcase that.
45 */
46
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <assert.h>
50 #include "halibut.h"
51
52 typedef struct {
53 char *filename;
54 int maxfilesize;
55 int charset;
56 int listindentbefore, listindentafter;
57 int indent_code, width, index_width;
58 wchar_t *bullet, *listsuffix;
59 wchar_t *startemph, *endemph;
60 wchar_t *lquote, *rquote;
61 wchar_t *sectsuffix, *underline;
62 wchar_t *rule;
63 wchar_t *index_text;
64 } infoconfig;
65
66 typedef struct {
67 rdstringc output;
68 int charset;
69 charset_state state;
70 int wcmode;
71 } info_data;
72 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
73 static const info_data empty_info_data = EMPTY_INFO_DATA;
74
75 typedef struct node_tag node;
76 struct node_tag {
77 node *listnext;
78 node *up, *prev, *next, *lastchild;
79 int pos, started_menu, filenum;
80 char *name;
81 info_data text;
82 };
83
84 typedef struct {
85 char *text;
86 int length;
87 int nnodes, nodesize;
88 node **nodes;
89 } info_idx;
90
91 static int info_rdadd(info_data *, wchar_t);
92 static int info_rdadds(info_data *, wchar_t const *);
93 static int info_rdaddc(info_data *, char);
94 static int info_rdaddsc(info_data *, char const *);
95
96 static void info_heading(info_data *, word *, word *, int, infoconfig *);
97 static void info_rule(info_data *, int, int, infoconfig *);
98 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
99 int, int, int, infoconfig *);
100 static void info_codepara(info_data *, word *, int, int);
101 static void info_versionid(info_data *, word *, infoconfig *);
102 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
103 static word *info_transform_wordlist(word *, keywordlist *);
104 static int info_check_index(word *, node *, indexdata *);
105
106 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
107
108 static node *info_node_new(char *name, int charset);
109 static char *info_node_name_for_para(paragraph *p, infoconfig *);
110 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
111
112 static infoconfig info_configure(paragraph *source) {
113 infoconfig ret;
114 paragraph *p;
115
116 /*
117 * Defaults.
118 */
119 ret.filename = dupstr("output.info");
120 ret.maxfilesize = 64 << 10;
121 ret.charset = CS_ASCII;
122 ret.width = 70;
123 ret.listindentbefore = 1;
124 ret.listindentafter = 3;
125 ret.indent_code = 2;
126 ret.index_width = 40;
127 ret.listsuffix = L".";
128 ret.bullet = L"\x2022\0-\0\0";
129 ret.rule = L"\x2500\0-\0\0";
130 ret.startemph = L"_\0_\0\0";
131 ret.endemph = uadv(ret.startemph);
132 ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
133 ret.rquote = uadv(ret.lquote);
134 ret.sectsuffix = L": ";
135 ret.underline = L"\x203E\0-\0\0";
136 ret.index_text = L"Index";
137
138 /*
139 * Two-pass configuration so that we can pick up global config
140 * (e.g. `quotes') before having it overridden by specific
141 * config (`info-quotes'), irrespective of the order in which
142 * they occur.
143 */
144 for (p = source; p; p = p->next) {
145 if (p->type == para_Config) {
146 if (!ustricmp(p->keyword, L"quotes")) {
147 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
148 ret.lquote = uadv(p->keyword);
149 ret.rquote = uadv(ret.lquote);
150 }
151 } else if (!ustricmp(p->keyword, L"index")) {
152 ret.index_text = uadv(p->keyword);
153 }
154 }
155 }
156
157 for (p = source; p; p = p->next) {
158 if (p->type == para_Config) {
159 if (!ustricmp(p->keyword, L"info-filename")) {
160 sfree(ret.filename);
161 ret.filename = dupstr(adv(p->origkeyword));
162 } else if (!ustricmp(p->keyword, L"info-charset")) {
163 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
164 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
165 ret.maxfilesize = utoi(uadv(p->keyword));
166 } else if (!ustricmp(p->keyword, L"info-width")) {
167 ret.width = utoi(uadv(p->keyword));
168 } else if (!ustricmp(p->keyword, L"info-indent-code")) {
169 ret.indent_code = utoi(uadv(p->keyword));
170 } else if (!ustricmp(p->keyword, L"info-index-width")) {
171 ret.index_width = utoi(uadv(p->keyword));
172 } else if (!ustricmp(p->keyword, L"info-list-indent")) {
173 ret.listindentbefore = utoi(uadv(p->keyword));
174 } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
175 ret.listindentafter = utoi(uadv(p->keyword));
176 } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
177 ret.sectsuffix = uadv(p->keyword);
178 } else if (!ustricmp(p->keyword, L"info-underline")) {
179 ret.underline = uadv(p->keyword);
180 } else if (!ustricmp(p->keyword, L"info-bullet")) {
181 ret.bullet = uadv(p->keyword);
182 } else if (!ustricmp(p->keyword, L"info-rule")) {
183 ret.rule = uadv(p->keyword);
184 } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
185 ret.listsuffix = uadv(p->keyword);
186 } else if (!ustricmp(p->keyword, L"info-emphasis")) {
187 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
188 ret.startemph = uadv(p->keyword);
189 ret.endemph = uadv(ret.startemph);
190 }
191 } else if (!ustricmp(p->keyword, L"info-quotes")) {
192 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
193 ret.lquote = uadv(p->keyword);
194 ret.rquote = uadv(ret.lquote);
195 }
196 }
197 }
198 }
199
200 /*
201 * Now process fallbacks on quote characters, underlines, the
202 * rule character, the emphasis characters, and bullets.
203 */
204 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
205 (!cvt_ok(ret.charset, ret.lquote) ||
206 !cvt_ok(ret.charset, ret.rquote))) {
207 ret.lquote = uadv(ret.rquote);
208 ret.rquote = uadv(ret.lquote);
209 }
210
211 while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
212 (!cvt_ok(ret.charset, ret.startemph) ||
213 !cvt_ok(ret.charset, ret.endemph))) {
214 ret.startemph = uadv(ret.endemph);
215 ret.endemph = uadv(ret.startemph);
216 }
217
218 while (*ret.underline && *uadv(ret.underline) &&
219 !cvt_ok(ret.charset, ret.underline))
220 ret.underline = uadv(ret.underline);
221
222 while (*ret.bullet && *uadv(ret.bullet) &&
223 !cvt_ok(ret.charset, ret.bullet))
224 ret.bullet = uadv(ret.bullet);
225
226 while (*ret.rule && *uadv(ret.rule) &&
227 !cvt_ok(ret.charset, ret.rule))
228 ret.rule = uadv(ret.rule);
229
230 return ret;
231 }
232
233 paragraph *info_config_filename(char *filename)
234 {
235 return cmdline_cfg_simple("info-filename", filename, NULL);
236 }
237
238 void info_backend(paragraph *sourceform, keywordlist *keywords,
239 indexdata *idx, void *unused) {
240 paragraph *p;
241 infoconfig conf;
242 word *prefix, *body, *wp;
243 word spaceword;
244 wchar_t *prefixextra;
245 int nesting, nestindent;
246 int indentb, indenta;
247 int filepos;
248 int has_index;
249 info_data intro_text = EMPTY_INFO_DATA;
250 node *topnode, *currnode;
251 word bullet;
252 FILE *fp;
253
254 IGNORE(unused);
255
256 conf = info_configure(sourceform);
257
258 /*
259 * Go through and create a node for each section.
260 */
261 topnode = info_node_new("Top", conf.charset);
262 currnode = topnode;
263 for (p = sourceform; p; p = p->next) switch (p->type) {
264 /*
265 * Chapter titles.
266 */
267 case para_Chapter:
268 case para_Appendix:
269 case para_UnnumberedChapter:
270 case para_Heading:
271 case para_Subsect:
272 {
273 node *newnode, *upnode;
274 char *nodename;
275
276 nodename = info_node_name_for_para(p, &conf);
277 newnode = info_node_new(nodename, conf.charset);
278 sfree(nodename);
279
280 p->private_data = newnode;
281
282 if (p->parent)
283 upnode = (node *)p->parent->private_data;
284 else
285 upnode = topnode;
286 assert(upnode);
287 newnode->up = upnode;
288
289 currnode->next = newnode;
290 newnode->prev = currnode;
291
292 currnode->listnext = newnode;
293 currnode = newnode;
294 }
295 break;
296 default:
297 p->private_data = NULL;
298 break;
299 }
300
301 /*
302 * Set up the display form of each index entry.
303 */
304 {
305 int i;
306 indexentry *entry;
307
308 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
309 info_idx *ii = snew(info_idx);
310 info_data id = EMPTY_INFO_DATA;
311
312 id.charset = conf.charset;
313
314 ii->nnodes = ii->nodesize = 0;
315 ii->nodes = NULL;
316
317 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
318
319 ii->text = id.output.text;
320
321 entry->backend_data = ii;
322 }
323 }
324
325 /*
326 * An Info file begins with a piece of introductory text which
327 * is apparently never shown anywhere. This seems to me to be a
328 * good place to put the copyright notice and the version IDs.
329 * Also, Info directory entries are expected to go here.
330 */
331 intro_text.charset = conf.charset;
332
333 info_rdaddsc(&intro_text,
334 "This Info file generated by Halibut, ");
335 info_rdaddsc(&intro_text, version);
336 info_rdaddsc(&intro_text, "\n\n");
337
338 for (p = sourceform; p; p = p->next)
339 if (p->type == para_Config &&
340 !ustricmp(p->keyword, L"info-dir-entry")) {
341 wchar_t *section, *shortname, *longname, *kw;
342 char *s;
343
344 section = uadv(p->keyword);
345 shortname = *section ? uadv(section) : L"";
346 longname = *shortname ? uadv(shortname) : L"";
347 kw = *longname ? uadv(longname) : L"";
348
349 if (!*longname) {
350 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
351 continue;
352 }
353
354 info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
355 info_rdadds(&intro_text, section);
356 info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
357 info_rdadds(&intro_text, shortname);
358 info_rdaddsc(&intro_text, ": (");
359 s = dupstr(conf.filename);
360 if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
361 s[strlen(s)-5] = '\0';
362 info_rdaddsc(&intro_text, s);
363 sfree(s);
364 info_rdaddsc(&intro_text, ")");
365 if (*kw) {
366 keyword *kwl = kw_lookup(keywords, kw);
367 if (kwl && kwl->para->private_data) {
368 node *n = (node *)kwl->para->private_data;
369 info_rdaddsc(&intro_text, n->name);
370 }
371 }
372 info_rdaddsc(&intro_text, ". ");
373 info_rdadds(&intro_text, longname);
374 info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
375 }
376
377 for (p = sourceform; p; p = p->next)
378 if (p->type == para_Copyright)
379 info_para(&intro_text, NULL, NULL, p->words, keywords,
380 0, 0, conf.width, &conf);
381
382 for (p = sourceform; p; p = p->next)
383 if (p->type == para_VersionID)
384 info_versionid(&intro_text, p->words, &conf);
385
386 if (intro_text.output.text[intro_text.output.pos-1] != '\n')
387 info_rdaddc(&intro_text, '\n');
388
389 /* Do the title */
390 for (p = sourceform; p; p = p->next)
391 if (p->type == para_Title)
392 info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
393
394 nestindent = conf.listindentbefore + conf.listindentafter;
395 nesting = 0;
396
397 currnode = topnode;
398
399 /* Do the main document */
400 for (p = sourceform; p; p = p->next) switch (p->type) {
401
402 case para_QuotePush:
403 nesting += 2;
404 break;
405 case para_QuotePop:
406 nesting -= 2;
407 assert(nesting >= 0);
408 break;
409
410 case para_LcontPush:
411 nesting += nestindent;
412 break;
413 case para_LcontPop:
414 nesting -= nestindent;
415 assert(nesting >= 0);
416 break;
417
418 /*
419 * Things we ignore because we've already processed them or
420 * aren't going to touch them in this pass.
421 */
422 case para_IM:
423 case para_BR:
424 case para_Biblio: /* only touch BiblioCited */
425 case para_VersionID:
426 case para_NoCite:
427 case para_Title:
428 break;
429
430 /*
431 * Chapter titles.
432 */
433 case para_Chapter:
434 case para_Appendix:
435 case para_UnnumberedChapter:
436 case para_Heading:
437 case para_Subsect:
438 currnode = p->private_data;
439 assert(currnode);
440 assert(currnode->up);
441
442 if (!currnode->up->started_menu) {
443 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
444 currnode->up->started_menu = TRUE;
445 }
446 info_menu_item(&currnode->up->text, currnode, p, &conf);
447
448 has_index |= info_check_index(p->words, currnode, idx);
449 info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
450 nesting = 0;
451 break;
452
453 case para_Rule:
454 info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
455 break;
456
457 case para_Normal:
458 case para_Copyright:
459 case para_DescribedThing:
460 case para_Description:
461 case para_BiblioCited:
462 case para_Bullet:
463 case para_NumberedList:
464 has_index |= info_check_index(p->words, currnode, idx);
465 if (p->type == para_Bullet) {
466 bullet.next = NULL;
467 bullet.alt = NULL;
468 bullet.type = word_Normal;
469 bullet.text = conf.bullet;
470 prefix = &bullet;
471 prefixextra = NULL;
472 indentb = conf.listindentbefore;
473 indenta = conf.listindentafter;
474 } else if (p->type == para_NumberedList) {
475 prefix = p->kwtext;
476 prefixextra = conf.listsuffix;
477 indentb = conf.listindentbefore;
478 indenta = conf.listindentafter;
479 } else if (p->type == para_Description) {
480 prefix = NULL;
481 prefixextra = NULL;
482 indentb = conf.listindentbefore;
483 indenta = conf.listindentafter;
484 } else {
485 prefix = NULL;
486 prefixextra = NULL;
487 indentb = indenta = 0;
488 }
489 if (p->type == para_BiblioCited) {
490 body = dup_word_list(p->kwtext);
491 for (wp = body; wp->next; wp = wp->next);
492 wp->next = &spaceword;
493 spaceword.next = p->words;
494 spaceword.alt = NULL;
495 spaceword.type = word_WhiteSpace;
496 spaceword.text = NULL;
497 } else {
498 wp = NULL;
499 body = p->words;
500 }
501 info_para(&currnode->text, prefix, prefixextra, body, keywords,
502 nesting + indentb, indenta,
503 conf.width - nesting - indentb - indenta, &conf);
504 if (wp) {
505 wp->next = NULL;
506 free_word_list(body);
507 }
508 break;
509
510 case para_Code:
511 info_codepara(&currnode->text, p->words,
512 nesting + conf.indent_code,
513 conf.width - nesting - 2 * conf.indent_code);
514 break;
515 }
516
517 /*
518 * Create an index node if required.
519 */
520 if (has_index) {
521 node *newnode;
522 int i, j, k;
523 indexentry *entry;
524 char *nodename;
525
526 nodename = info_node_name_for_text(conf.index_text, &conf);
527 newnode = info_node_new(nodename, conf.charset);
528 sfree(nodename);
529
530 newnode->up = topnode;
531
532 currnode->next = newnode;
533 newnode->prev = currnode;
534 currnode->listnext = newnode;
535
536 k = info_rdadds(&newnode->text, conf.index_text);
537 info_rdaddsc(&newnode->text, "\n");
538 while (k > 0) {
539 info_rdadds(&newnode->text, conf.underline);
540 k -= ustrwid(conf.underline, conf.charset);
541 }
542 info_rdaddsc(&newnode->text, "\n\n");
543
544 info_menu_item(&topnode->text, newnode, NULL, &conf);
545
546 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
547 info_idx *ii = (info_idx *)entry->backend_data;
548
549 for (j = 0; j < ii->nnodes; j++) {
550 /*
551 * When we have multiple references for a single
552 * index term, we only display the actual term on
553 * the first line, to make it clear that the terms
554 * really are the same.
555 */
556 if (j == 0)
557 info_rdaddsc(&newnode->text, ii->text);
558 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
559 info_rdaddc(&newnode->text, ' ');
560 info_rdaddsc(&newnode->text, " *Note ");
561 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
562 info_rdaddsc(&newnode->text, "::\n");
563 }
564 }
565 }
566
567 /*
568 * Finalise the text of each node, by adding the ^_ delimiter
569 * and the node line at the top.
570 */
571 for (currnode = topnode; currnode; currnode = currnode->listnext) {
572 char *origtext = currnode->text.output.text;
573 currnode->text = empty_info_data;
574 currnode->text.charset = conf.charset;
575 info_rdaddsc(&currnode->text, "\037\nFile: ");
576 info_rdaddsc(&currnode->text, conf.filename);
577 info_rdaddsc(&currnode->text, ", Node: ");
578 info_rdaddsc(&currnode->text, currnode->name);
579 if (currnode->prev) {
580 info_rdaddsc(&currnode->text, ", Prev: ");
581 info_rdaddsc(&currnode->text, currnode->prev->name);
582 }
583 info_rdaddsc(&currnode->text, ", Up: ");
584 info_rdaddsc(&currnode->text, (currnode->up ?
585 currnode->up->name : "(dir)"));
586 if (currnode->next) {
587 info_rdaddsc(&currnode->text, ", Next: ");
588 info_rdaddsc(&currnode->text, currnode->next->name);
589 }
590 info_rdaddsc(&currnode->text, "\n\n");
591 info_rdaddsc(&currnode->text, origtext);
592 /*
593 * Just make _absolutely_ sure we end with a newline.
594 */
595 if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
596 info_rdaddc(&currnode->text, '\n');
597
598 sfree(origtext);
599 }
600
601 /*
602 * Compute the offsets for the tag table.
603 */
604 filepos = intro_text.output.pos;
605 for (currnode = topnode; currnode; currnode = currnode->listnext) {
606 currnode->pos = filepos;
607 filepos += currnode->text.output.pos;
608 }
609
610 /*
611 * Split into sub-files.
612 */
613 if (conf.maxfilesize > 0) {
614 int currfilesize = intro_text.output.pos, currfilenum = 1;
615 for (currnode = topnode; currnode; currnode = currnode->listnext) {
616 if (currfilesize > intro_text.output.pos &&
617 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
618 currfilenum++;
619 currfilesize = intro_text.output.pos;
620 }
621 currnode->filenum = currfilenum;
622 currfilesize += currnode->text.output.pos;
623 }
624 }
625
626 /*
627 * Write the primary output file.
628 */
629 fp = fopen(conf.filename, "w");
630 if (!fp) {
631 error(err_cantopenw, conf.filename);
632 return;
633 }
634 fputs(intro_text.output.text, fp);
635 if (conf.maxfilesize == 0) {
636 for (currnode = topnode; currnode; currnode = currnode->listnext)
637 fputs(currnode->text.output.text, fp);
638 } else {
639 int filenum = 0;
640 fprintf(fp, "\037\nIndirect:\n");
641 for (currnode = topnode; currnode; currnode = currnode->listnext)
642 if (filenum != currnode->filenum) {
643 filenum = currnode->filenum;
644 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
645 currnode->pos);
646 }
647 }
648 fprintf(fp, "\037\nTag Table:\n");
649 if (conf.maxfilesize > 0)
650 fprintf(fp, "(Indirect)\n");
651 for (currnode = topnode; currnode; currnode = currnode->listnext)
652 fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
653 fprintf(fp, "\037\nEnd Tag Table\n");
654 fclose(fp);
655
656 /*
657 * Write the subfiles.
658 */
659 if (conf.maxfilesize > 0) {
660 int filenum = 0;
661 fp = NULL;
662
663 for (currnode = topnode; currnode; currnode = currnode->listnext) {
664 if (filenum != currnode->filenum) {
665 char *fname;
666
667 filenum = currnode->filenum;
668
669 if (fp)
670 fclose(fp);
671 fname = snewn(strlen(conf.filename) + 40, char);
672 sprintf(fname, "%s-%d", conf.filename, filenum);
673 fp = fopen(fname, "w");
674 if (!fp) {
675 error(err_cantopenw, fname);
676 return;
677 }
678 sfree(fname);
679 fputs(intro_text.output.text, fp);
680 }
681 fputs(currnode->text.output.text, fp);
682 }
683
684 if (fp)
685 fclose(fp);
686 }
687 }
688
689 static int info_check_index(word *w, node *n, indexdata *idx)
690 {
691 int ret = 0;
692
693 for (; w; w = w->next) {
694 if (w->type == word_IndexRef) {
695 indextag *tag;
696 int i;
697
698 tag = index_findtag(idx, w->text);
699 if (!tag)
700 break;
701
702 for (i = 0; i < tag->nrefs; i++) {
703 indexentry *entry = tag->refs[i];
704 info_idx *ii = (info_idx *)entry->backend_data;
705
706 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
707 /*
708 * If the same index term is indexed twice
709 * within the same section, we only want to
710 * mention it once in the index. So do nothing
711 * here.
712 */
713 continue;
714 }
715
716 if (ii->nnodes >= ii->nodesize) {
717 ii->nodesize += 32;
718 ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
719 }
720
721 ii->nodes[ii->nnodes++] = n;
722
723 ret = 1;
724 }
725 }
726 }
727
728 return ret;
729 }
730
731 static word *info_transform_wordlist(word *words, keywordlist *keywords)
732 {
733 word *ret = dup_word_list(words);
734 word *w;
735 keyword *kwl;
736
737 for (w = ret; w; w = w->next) {
738 w->private_data = NULL;
739 if (w->type == word_UpperXref || w->type == word_LowerXref) {
740 kwl = kw_lookup(keywords, w->text);
741 if (kwl) {
742 if (kwl->para->type == para_NumberedList ||
743 kwl->para->type == para_BiblioCited) {
744 /*
745 * In Info, we do nothing special for xrefs to
746 * numbered list items or bibliography entries.
747 */
748 continue;
749 } else {
750 /*
751 * An xref to a different section has its text
752 * completely replaced.
753 */
754 word *w2, *w3, *w4;
755 w2 = w3 = w->next;
756 w4 = NULL;
757 while (w2) {
758 if (w2->type == word_XrefEnd) {
759 w4 = w2->next;
760 w2->next = NULL;
761 break;
762 }
763 w2 = w2->next;
764 }
765 free_word_list(w3);
766
767 /*
768 * Now w is the UpperXref / LowerXref we
769 * started with, and w4 is the next word after
770 * the corresponding XrefEnd (if any). The
771 * simplest thing is just to stick a pointer to
772 * the target node structure in the private
773 * data field of the xref word, and let
774 * info_rdaddwc and friends read the node name
775 * out from there.
776 */
777 w->next = w4;
778 w->private_data = kwl->para->private_data;
779 assert(w->private_data);
780 }
781 }
782 }
783 }
784
785 return ret;
786 }
787
788 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
789 infoconfig *cfg) {
790 int ret = 0;
791
792 for (; words && words != end; words = words->next) switch (words->type) {
793 case word_HyperLink:
794 case word_HyperEnd:
795 case word_XrefEnd:
796 case word_IndexRef:
797 break;
798
799 case word_Normal:
800 case word_Emph:
801 case word_Code:
802 case word_WeakCode:
803 case word_WhiteSpace:
804 case word_EmphSpace:
805 case word_CodeSpace:
806 case word_WkCodeSpace:
807 case word_Quote:
808 case word_EmphQuote:
809 case word_CodeQuote:
810 case word_WkCodeQuote:
811 assert(words->type != word_CodeQuote &&
812 words->type != word_WkCodeQuote);
813 if (towordstyle(words->type) == word_Emph &&
814 (attraux(words->aux) == attr_First ||
815 attraux(words->aux) == attr_Only))
816 ret += info_rdadds(id, cfg->startemph);
817 else if (towordstyle(words->type) == word_Code &&
818 (attraux(words->aux) == attr_First ||
819 attraux(words->aux) == attr_Only))
820 ret += info_rdadds(id, cfg->lquote);
821 if (removeattr(words->type) == word_Normal) {
822 if (cvt_ok(id->charset, words->text) || !words->alt)
823 ret += info_rdadds(id, words->text);
824 else
825 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
826 } else if (removeattr(words->type) == word_WhiteSpace) {
827 ret += info_rdadd(id, L' ');
828 } else if (removeattr(words->type) == word_Quote) {
829 ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
830 cfg->lquote : cfg->rquote);
831 }
832 if (towordstyle(words->type) == word_Emph &&
833 (attraux(words->aux) == attr_Last ||
834 attraux(words->aux) == attr_Only))
835 ret += info_rdadds(id, cfg->endemph);
836 else if (towordstyle(words->type) == word_Code &&
837 (attraux(words->aux) == attr_Last ||
838 attraux(words->aux) == attr_Only))
839 ret += info_rdadds(id, cfg->rquote);
840 break;
841
842 case word_UpperXref:
843 case word_LowerXref:
844 if (xrefs && words->private_data) {
845 /*
846 * This bit is structural and so must be done in char
847 * rather than wchar_t.
848 */
849 ret += info_rdaddsc(id, "*Note ");
850 ret += info_rdaddsc(id, ((node *)words->private_data)->name);
851 ret += info_rdaddsc(id, "::");
852 }
853 break;
854 }
855
856 return ret;
857 }
858
859 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
860
861 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
862 int w = 0;
863 while (words) {
864 w += info_width_internal(words, xrefs, cfg);
865 words = words->next;
866 }
867 return w;
868 }
869
870 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
871 int wid;
872 int attr;
873
874 switch (words->type) {
875 case word_HyperLink:
876 case word_HyperEnd:
877 case word_XrefEnd:
878 case word_IndexRef:
879 return 0;
880
881 case word_UpperXref:
882 case word_LowerXref:
883 if (xrefs && words->private_data) {
884 /* "*Note " plus "::" comes to 8 characters */
885 return 8 + strwid(((node *)words->private_data)->name,
886 cfg->charset);
887 } else
888 return 0;
889 }
890
891 assert(words->type < word_internal_endattrs);
892
893 wid = 0;
894 attr = towordstyle(words->type);
895
896 if (attr == word_Emph || attr == word_Code) {
897 if (attraux(words->aux) == attr_Only ||
898 attraux(words->aux) == attr_First)
899 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
900 cfg->charset);
901 }
902 if (attr == word_Emph || attr == word_Code) {
903 if (attraux(words->aux) == attr_Only ||
904 attraux(words->aux) == attr_Last)
905 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
906 cfg->charset);
907 }
908
909 switch (words->type) {
910 case word_Normal:
911 case word_Emph:
912 case word_Code:
913 case word_WeakCode:
914 if (cvt_ok(cfg->charset, words->text) || !words->alt)
915 wid += ustrwid(words->text, cfg->charset);
916 else
917 wid += info_width_internal_list(words->alt, xrefs, cfg);
918 return wid;
919
920 case word_WhiteSpace:
921 case word_EmphSpace:
922 case word_CodeSpace:
923 case word_WkCodeSpace:
924 case word_Quote:
925 case word_EmphQuote:
926 case word_CodeQuote:
927 case word_WkCodeQuote:
928 assert(words->type != word_CodeQuote &&
929 words->type != word_WkCodeQuote);
930 if (removeattr(words->type) == word_Quote) {
931 if (quoteaux(words->aux) == quote_Open)
932 wid += ustrwid(cfg->lquote, cfg->charset);
933 else
934 wid += ustrwid(cfg->rquote, cfg->charset);
935 } else
936 wid++; /* space */
937 }
938 return wid;
939 }
940
941 static int info_width_noxrefs(void *ctx, word *words)
942 {
943 return info_width_internal(words, FALSE, (infoconfig *)ctx);
944 }
945 static int info_width_xrefs(void *ctx, word *words)
946 {
947 return info_width_internal(words, TRUE, (infoconfig *)ctx);
948 }
949
950 static void info_heading(info_data *text, word *tprefix,
951 word *words, int width, infoconfig *cfg) {
952 int length;
953 int firstlinewidth, wrapwidth;
954 wrappedline *wrapping, *p;
955
956 length = 0;
957 if (tprefix) {
958 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
959 length += info_rdadds(text, cfg->sectsuffix);
960 }
961
962 wrapwidth = width;
963 firstlinewidth = width - length;
964
965 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
966 info_width_noxrefs, cfg, 0);
967 for (p = wrapping; p; p = p->next) {
968 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
969 info_rdadd(text, L'\n');
970 while (length > 0) {
971 info_rdadds(text, cfg->underline);
972 length -= ustrwid(cfg->underline, cfg->charset);
973 }
974 info_rdadd(text, L'\n');
975 length = 0;
976 }
977 wrap_free(wrapping);
978 info_rdadd(text, L'\n');
979 }
980
981 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
982 {
983 while (indent--) info_rdadd(text, L' ');
984 while (width > 0) {
985 info_rdadds(text, cfg->rule);
986 width -= ustrwid(cfg->rule, cfg->charset);
987 }
988 info_rdadd(text, L'\n');
989 info_rdadd(text, L'\n');
990 }
991
992 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
993 word *input, keywordlist *keywords, int indent,
994 int extraindent, int width, infoconfig *cfg) {
995 wrappedline *wrapping, *p;
996 word *words;
997 int e;
998 int i;
999 int firstlinewidth = width;
1000
1001 words = info_transform_wordlist(input, keywords);
1002
1003 if (prefix) {
1004 for (i = 0; i < indent; i++)
1005 info_rdadd(text, L' ');
1006 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1007 if (prefixextra)
1008 e += info_rdadds(text, prefixextra);
1009 /* If the prefix is too long, shorten the first line to fit. */
1010 e = extraindent - e;
1011 if (e < 0) {
1012 firstlinewidth += e; /* this decreases it, since e < 0 */
1013 if (firstlinewidth < 0) {
1014 e = indent + extraindent;
1015 firstlinewidth = width;
1016 info_rdadd(text, L'\n');
1017 } else
1018 e = 0;
1019 }
1020 } else
1021 e = indent + extraindent;
1022
1023 wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1024 cfg, 0);
1025 for (p = wrapping; p; p = p->next) {
1026 for (i = 0; i < e; i++)
1027 info_rdadd(text, L' ');
1028 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1029 info_rdadd(text, L'\n');
1030 e = indent + extraindent;
1031 }
1032 wrap_free(wrapping);
1033 info_rdadd(text, L'\n');
1034
1035 free_word_list(words);
1036 }
1037
1038 static void info_codepara(info_data *text, word *words,
1039 int indent, int width) {
1040 int i;
1041
1042 for (; words; words = words->next) if (words->type == word_WeakCode) {
1043 for (i = 0; i < indent; i++)
1044 info_rdadd(text, L' ');
1045 if (info_rdadds(text, words->text) > width) {
1046 /* FIXME: warn */
1047 }
1048 info_rdadd(text, L'\n');
1049 }
1050
1051 info_rdadd(text, L'\n');
1052 }
1053
1054 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1055 info_rdadd(text, L'[');
1056 info_rdaddwc(text, words, NULL, FALSE, cfg);
1057 info_rdadds(text, L"]\n");
1058 }
1059
1060 static node *info_node_new(char *name, int charset)
1061 {
1062 node *n;
1063
1064 n = snew(node);
1065 n->text = empty_info_data;
1066 n->text.charset = charset;
1067 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1068 n->name = dupstr(name);
1069 n->started_menu = FALSE;
1070
1071 return n;
1072 }
1073
1074 static char *info_node_name_core(info_data *id, filepos *fpos)
1075 {
1076 char *p, *q;
1077
1078 /*
1079 * We cannot have commas, colons or parentheses in a node name.
1080 * Remove any that we find, with a warning.
1081 */
1082 p = q = id->output.text;
1083 while (*p) {
1084 if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1085 error(err_infonodechar, fpos, *p);
1086 } else {
1087 *q++ = *p;
1088 }
1089 p++;
1090 }
1091 *q = '\0';
1092
1093 return id->output.text;
1094 }
1095
1096 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1097 {
1098 info_data id = EMPTY_INFO_DATA;
1099
1100 id.charset = cfg->charset;
1101 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1102 NULL, FALSE, cfg);
1103 info_rdaddsc(&id, NULL);
1104
1105 return info_node_name_core(&id, &par->fpos);
1106 }
1107
1108 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1109 {
1110 info_data id = EMPTY_INFO_DATA;
1111
1112 id.charset = cfg->charset;
1113 info_rdadds(&id, text);
1114 info_rdaddsc(&id, NULL);
1115
1116 return info_node_name_core(&id, NULL);
1117 }
1118
1119 static void info_menu_item(info_data *text, node *n, paragraph *p,
1120 infoconfig *cfg)
1121 {
1122 /*
1123 * FIXME: Depending on how we're doing node names in this info
1124 * file, we might want to do
1125 *
1126 * * Node name:: Chapter title
1127 *
1128 * _or_
1129 *
1130 * * Chapter number: Node name.
1131 *
1132 * This function mostly works in char rather than wchar_t,
1133 * because a menu item is a structural component.
1134 */
1135 info_rdaddsc(text, "* ");
1136 info_rdaddsc(text, n->name);
1137 info_rdaddsc(text, "::");
1138 if (p) {
1139 info_rdaddc(text, ' ');
1140 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1141 }
1142 info_rdaddc(text, '\n');
1143 }
1144
1145 /*
1146 * These functions implement my wrapper on the rdadd* calls which
1147 * allows me to switch arbitrarily between literal octet-string
1148 * text and charset-translated Unicode. (Because no matter what
1149 * character set I write the actual text in, I expect info readers
1150 * to treat node names and file names literally and to expect
1151 * keywords like `*Note' in their canonical form, so I have to take
1152 * steps to ensure that those structural elements of the file
1153 * aren't messed with.)
1154 */
1155 static int info_rdadds(info_data *d, wchar_t const *wcs)
1156 {
1157 if (!d->wcmode) {
1158 d->state = charset_init_state;
1159 d->wcmode = TRUE;
1160 }
1161
1162 if (wcs) {
1163 char buf[256];
1164 int len, width, ret;
1165
1166 width = ustrwid(wcs, d->charset);
1167
1168 len = ustrlen(wcs);
1169 while (len > 0) {
1170 int prevlen = len;
1171
1172 ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1173 d->charset, &d->state, NULL);
1174
1175 assert(len < prevlen);
1176
1177 if (ret > 0) {
1178 buf[ret] = '\0';
1179 rdaddsc(&d->output, buf);
1180 }
1181 }
1182
1183 return width;
1184 } else
1185 return 0;
1186 }
1187
1188 static int info_rdaddsc(info_data *d, char const *cs)
1189 {
1190 if (d->wcmode) {
1191 char buf[256];
1192 int ret;
1193
1194 ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1195 d->charset, &d->state, NULL);
1196 if (ret > 0) {
1197 buf[ret] = '\0';
1198 rdaddsc(&d->output, buf);
1199 }
1200
1201 d->wcmode = FALSE;
1202 }
1203
1204 if (cs) {
1205 rdaddsc(&d->output, cs);
1206 return strwid(cs, d->charset);
1207 } else
1208 return 0;
1209 }
1210
1211 static int info_rdadd(info_data *d, wchar_t wc)
1212 {
1213 wchar_t wcs[2];
1214 wcs[0] = wc;
1215 wcs[1] = L'\0';
1216 return info_rdadds(d, wcs);
1217 }
1218
1219 static int info_rdaddc(info_data *d, char c)
1220 {
1221 char cs[2];
1222 cs[0] = c;
1223 cs[1] = '\0';
1224 return info_rdaddsc(d, cs);
1225 }