Introduce global (cross-backend) \cfg{contents} and \cfg{index}
[sgt/halibut] / bk_info.c
1 /*
2 * info backend for Halibut
3 *
4 * Possible future work:
5 *
6 * - configurable choice of how to allocate node names?
7 * + possibly a template-like approach, choosing node names to
8 * be the full section title or perhaps the internal keyword?
9 * + neither of those seems quite right. Perhaps instead a
10 * Windows Help-like mechanism, where a magic config
11 * directive allows user choice of name for every node.
12 * + Only trouble with that is, now what happens to the section
13 * numbers? Do they become completely vestigial and just sit
14 * in the title text of each node? Or do we keep them in the
15 * menus somehow? I think people might occasionally want to
16 * go to a section by number, if only because all the _other_
17 * formats of the same document will reference the numbers
18 * all the time. So our menu lines could look like one of
19 * these:
20 * * Nodename: Section 1.2. Title of section.
21 * * Section 1.2: Nodename. Title of section.
22 *
23 * - might be helpful to diagnose duplicate node names!
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <assert.h>
29 #include "halibut.h"
30
31 typedef struct {
32 char *filename;
33 int maxfilesize;
34 int charset;
35 int listindentbefore, listindentafter;
36 int indent_code, width, index_width;
37 wchar_t *bullet, *listsuffix;
38 wchar_t *startemph, *endemph;
39 wchar_t *lquote, *rquote;
40 wchar_t *sectsuffix, *underline;
41 wchar_t *rule;
42 wchar_t *index_text;
43 } infoconfig;
44
45 typedef struct {
46 rdstringc output;
47 int charset;
48 charset_state state;
49 int wcmode;
50 } info_data;
51 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
52 static const info_data empty_info_data = EMPTY_INFO_DATA;
53
54 typedef struct node_tag node;
55 struct node_tag {
56 node *listnext;
57 node *up, *prev, *next, *lastchild;
58 int pos, started_menu, filenum;
59 char *name;
60 info_data text;
61 };
62
63 typedef struct {
64 char *text;
65 int length;
66 int nnodes, nodesize;
67 node **nodes;
68 } info_idx;
69
70 static int info_rdadd(info_data *, wchar_t);
71 static int info_rdadds(info_data *, wchar_t const *);
72 static int info_rdaddc(info_data *, char);
73 static int info_rdaddsc(info_data *, char const *);
74
75 static void info_heading(info_data *, word *, word *, int, infoconfig *);
76 static void info_rule(info_data *, int, int, infoconfig *);
77 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
78 int, int, int, infoconfig *);
79 static void info_codepara(info_data *, word *, int, int);
80 static void info_versionid(info_data *, word *, infoconfig *);
81 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
82 static word *info_transform_wordlist(word *, keywordlist *);
83 static int info_check_index(word *, node *, indexdata *);
84
85 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
86
87 static node *info_node_new(char *name, int charset);
88 static char *info_node_name_for_para(paragraph *p, infoconfig *);
89 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
90
91 static infoconfig info_configure(paragraph *source) {
92 infoconfig ret;
93 paragraph *p;
94
95 /*
96 * Defaults.
97 */
98 ret.filename = dupstr("output.info");
99 ret.maxfilesize = 64 << 10;
100 ret.charset = CS_ASCII;
101 ret.width = 70;
102 ret.listindentbefore = 1;
103 ret.listindentafter = 3;
104 ret.indent_code = 2;
105 ret.index_width = 40;
106 ret.listsuffix = L".";
107 ret.bullet = L"\x2022\0-\0\0";
108 ret.rule = L"\x2500\0-\0\0";
109 ret.startemph = L"_\0_\0\0";
110 ret.endemph = uadv(ret.startemph);
111 ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
112 ret.rquote = uadv(ret.lquote);
113 ret.sectsuffix = L": ";
114 ret.underline = L"\x203E\0-\0\0";
115 ret.index_text = L"Index";
116
117 /*
118 * Two-pass configuration so that we can pick up global config
119 * (e.g. `quotes') before having it overridden by specific
120 * config (`info-quotes'), irrespective of the order in which
121 * they occur.
122 */
123 for (p = source; p; p = p->next) {
124 if (p->type == para_Config) {
125 if (!ustricmp(p->keyword, L"quotes")) {
126 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
127 ret.lquote = uadv(p->keyword);
128 ret.rquote = uadv(ret.lquote);
129 }
130 } else if (!ustricmp(p->keyword, L"index")) {
131 ret.index_text = uadv(p->keyword);
132 }
133 }
134 }
135
136 for (p = source; p; p = p->next) {
137 if (p->type == para_Config) {
138 if (!ustricmp(p->keyword, L"info-filename")) {
139 sfree(ret.filename);
140 ret.filename = dupstr(adv(p->origkeyword));
141 } else if (!ustricmp(p->keyword, L"info-charset")) {
142 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
143 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
144 ret.maxfilesize = utoi(uadv(p->keyword));
145 } else if (!ustricmp(p->keyword, L"info-width")) {
146 ret.width = utoi(uadv(p->keyword));
147 } else if (!ustricmp(p->keyword, L"info-indent-code")) {
148 ret.indent_code = utoi(uadv(p->keyword));
149 } else if (!ustricmp(p->keyword, L"info-index-width")) {
150 ret.index_width = utoi(uadv(p->keyword));
151 } else if (!ustricmp(p->keyword, L"info-list-indent")) {
152 ret.listindentbefore = utoi(uadv(p->keyword));
153 } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
154 ret.listindentafter = utoi(uadv(p->keyword));
155 } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
156 ret.sectsuffix = uadv(p->keyword);
157 } else if (!ustricmp(p->keyword, L"info-underline")) {
158 ret.underline = uadv(p->keyword);
159 } else if (!ustricmp(p->keyword, L"info-bullet")) {
160 ret.bullet = uadv(p->keyword);
161 } else if (!ustricmp(p->keyword, L"info-rule")) {
162 ret.rule = uadv(p->keyword);
163 } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
164 ret.listsuffix = uadv(p->keyword);
165 } else if (!ustricmp(p->keyword, L"info-emphasis")) {
166 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
167 ret.startemph = uadv(p->keyword);
168 ret.endemph = uadv(ret.startemph);
169 }
170 } else if (!ustricmp(p->keyword, L"info-quotes")) {
171 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
172 ret.lquote = uadv(p->keyword);
173 ret.rquote = uadv(ret.lquote);
174 }
175 }
176 }
177 }
178
179 /*
180 * Now process fallbacks on quote characters, underlines, the
181 * rule character, the emphasis characters, and bullets.
182 */
183 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
184 (!cvt_ok(ret.charset, ret.lquote) ||
185 !cvt_ok(ret.charset, ret.rquote))) {
186 ret.lquote = uadv(ret.rquote);
187 ret.rquote = uadv(ret.lquote);
188 }
189
190 while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
191 (!cvt_ok(ret.charset, ret.startemph) ||
192 !cvt_ok(ret.charset, ret.endemph))) {
193 ret.startemph = uadv(ret.endemph);
194 ret.endemph = uadv(ret.startemph);
195 }
196
197 while (*ret.underline && *uadv(ret.underline) &&
198 !cvt_ok(ret.charset, ret.underline))
199 ret.underline = uadv(ret.underline);
200
201 while (*ret.bullet && *uadv(ret.bullet) &&
202 !cvt_ok(ret.charset, ret.bullet))
203 ret.bullet = uadv(ret.bullet);
204
205 while (*ret.rule && *uadv(ret.rule) &&
206 !cvt_ok(ret.charset, ret.rule))
207 ret.rule = uadv(ret.rule);
208
209 return ret;
210 }
211
212 paragraph *info_config_filename(char *filename)
213 {
214 return cmdline_cfg_simple("info-filename", filename, NULL);
215 }
216
217 void info_backend(paragraph *sourceform, keywordlist *keywords,
218 indexdata *idx, void *unused) {
219 paragraph *p;
220 infoconfig conf;
221 word *prefix, *body, *wp;
222 word spaceword;
223 wchar_t *prefixextra;
224 int nesting, nestindent;
225 int indentb, indenta;
226 int filepos;
227 int has_index;
228 info_data intro_text = EMPTY_INFO_DATA;
229 node *topnode, *currnode;
230 word bullet;
231 FILE *fp;
232
233 IGNORE(unused);
234
235 conf = info_configure(sourceform);
236
237 /*
238 * Go through and create a node for each section.
239 */
240 topnode = info_node_new("Top", conf.charset);
241 currnode = topnode;
242 for (p = sourceform; p; p = p->next) switch (p->type) {
243 /*
244 * Chapter titles.
245 */
246 case para_Chapter:
247 case para_Appendix:
248 case para_UnnumberedChapter:
249 case para_Heading:
250 case para_Subsect:
251 {
252 node *newnode, *upnode;
253 char *nodename;
254
255 nodename = info_node_name_for_para(p, &conf);
256 newnode = info_node_new(nodename, conf.charset);
257 sfree(nodename);
258
259 p->private_data = newnode;
260
261 if (p->parent)
262 upnode = (node *)p->parent->private_data;
263 else
264 upnode = topnode;
265 assert(upnode);
266 newnode->up = upnode;
267
268 currnode->next = newnode;
269 newnode->prev = currnode;
270
271 currnode->listnext = newnode;
272 currnode = newnode;
273 }
274 break;
275 default:
276 p->private_data = NULL;
277 break;
278 }
279
280 /*
281 * Set up the display form of each index entry.
282 */
283 {
284 int i;
285 indexentry *entry;
286
287 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
288 info_idx *ii = snew(info_idx);
289 info_data id = EMPTY_INFO_DATA;
290
291 id.charset = conf.charset;
292
293 ii->nnodes = ii->nodesize = 0;
294 ii->nodes = NULL;
295
296 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
297
298 ii->text = id.output.text;
299
300 entry->backend_data = ii;
301 }
302 }
303
304 /*
305 * An Info file begins with a piece of introductory text which
306 * is apparently never shown anywhere. This seems to me to be a
307 * good place to put the copyright notice and the version IDs.
308 * Also, Info directory entries are expected to go here.
309 */
310 intro_text.charset = conf.charset;
311
312 info_rdaddsc(&intro_text,
313 "This Info file generated by Halibut, ");
314 info_rdaddsc(&intro_text, version);
315 info_rdaddsc(&intro_text, "\n\n");
316
317 for (p = sourceform; p; p = p->next)
318 if (p->type == para_Config &&
319 !ustricmp(p->keyword, L"info-dir-entry")) {
320 wchar_t *section, *shortname, *longname, *kw;
321 char *s;
322
323 section = uadv(p->keyword);
324 shortname = *section ? uadv(section) : L"";
325 longname = *shortname ? uadv(shortname) : L"";
326 kw = *longname ? uadv(longname) : L"";
327
328 if (!*longname) {
329 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
330 continue;
331 }
332
333 info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
334 info_rdadds(&intro_text, section);
335 info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
336 info_rdadds(&intro_text, shortname);
337 info_rdaddsc(&intro_text, ": (");
338 s = dupstr(conf.filename);
339 if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
340 s[strlen(s)-5] = '\0';
341 info_rdaddsc(&intro_text, s);
342 sfree(s);
343 info_rdaddsc(&intro_text, ")");
344 if (*kw) {
345 keyword *kwl = kw_lookup(keywords, kw);
346 if (kwl && kwl->para->private_data) {
347 node *n = (node *)kwl->para->private_data;
348 info_rdaddsc(&intro_text, n->name);
349 }
350 }
351 info_rdaddsc(&intro_text, ". ");
352 info_rdadds(&intro_text, longname);
353 info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
354 }
355
356 for (p = sourceform; p; p = p->next)
357 if (p->type == para_Copyright)
358 info_para(&intro_text, NULL, NULL, p->words, keywords,
359 0, 0, conf.width, &conf);
360
361 for (p = sourceform; p; p = p->next)
362 if (p->type == para_VersionID)
363 info_versionid(&intro_text, p->words, &conf);
364
365 if (intro_text.output.text[intro_text.output.pos-1] != '\n')
366 info_rdaddc(&intro_text, '\n');
367
368 /* Do the title */
369 for (p = sourceform; p; p = p->next)
370 if (p->type == para_Title)
371 info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
372
373 nestindent = conf.listindentbefore + conf.listindentafter;
374 nesting = 0;
375
376 currnode = topnode;
377
378 /* Do the main document */
379 for (p = sourceform; p; p = p->next) switch (p->type) {
380
381 case para_QuotePush:
382 nesting += 2;
383 break;
384 case para_QuotePop:
385 nesting -= 2;
386 assert(nesting >= 0);
387 break;
388
389 case para_LcontPush:
390 nesting += nestindent;
391 break;
392 case para_LcontPop:
393 nesting -= nestindent;
394 assert(nesting >= 0);
395 break;
396
397 /*
398 * Things we ignore because we've already processed them or
399 * aren't going to touch them in this pass.
400 */
401 case para_IM:
402 case para_BR:
403 case para_Biblio: /* only touch BiblioCited */
404 case para_VersionID:
405 case para_NoCite:
406 case para_Title:
407 break;
408
409 /*
410 * Chapter titles.
411 */
412 case para_Chapter:
413 case para_Appendix:
414 case para_UnnumberedChapter:
415 case para_Heading:
416 case para_Subsect:
417 currnode = p->private_data;
418 assert(currnode);
419 assert(currnode->up);
420
421 if (!currnode->up->started_menu) {
422 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
423 currnode->up->started_menu = TRUE;
424 }
425 info_menu_item(&currnode->up->text, currnode, p, &conf);
426
427 has_index |= info_check_index(p->words, currnode, idx);
428 info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
429 nesting = 0;
430 break;
431
432 case para_Rule:
433 info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
434 break;
435
436 case para_Normal:
437 case para_Copyright:
438 case para_DescribedThing:
439 case para_Description:
440 case para_BiblioCited:
441 case para_Bullet:
442 case para_NumberedList:
443 has_index |= info_check_index(p->words, currnode, idx);
444 if (p->type == para_Bullet) {
445 bullet.next = NULL;
446 bullet.alt = NULL;
447 bullet.type = word_Normal;
448 bullet.text = conf.bullet;
449 prefix = &bullet;
450 prefixextra = NULL;
451 indentb = conf.listindentbefore;
452 indenta = conf.listindentafter;
453 } else if (p->type == para_NumberedList) {
454 prefix = p->kwtext;
455 prefixextra = conf.listsuffix;
456 indentb = conf.listindentbefore;
457 indenta = conf.listindentafter;
458 } else if (p->type == para_Description) {
459 prefix = NULL;
460 prefixextra = NULL;
461 indentb = conf.listindentbefore;
462 indenta = conf.listindentafter;
463 } else {
464 prefix = NULL;
465 prefixextra = NULL;
466 indentb = indenta = 0;
467 }
468 if (p->type == para_BiblioCited) {
469 body = dup_word_list(p->kwtext);
470 for (wp = body; wp->next; wp = wp->next);
471 wp->next = &spaceword;
472 spaceword.next = p->words;
473 spaceword.alt = NULL;
474 spaceword.type = word_WhiteSpace;
475 spaceword.text = NULL;
476 } else {
477 wp = NULL;
478 body = p->words;
479 }
480 info_para(&currnode->text, prefix, prefixextra, body, keywords,
481 nesting + indentb, indenta,
482 conf.width - nesting - indentb - indenta, &conf);
483 if (wp) {
484 wp->next = NULL;
485 free_word_list(body);
486 }
487 break;
488
489 case para_Code:
490 info_codepara(&currnode->text, p->words,
491 nesting + conf.indent_code,
492 conf.width - nesting - 2 * conf.indent_code);
493 break;
494 }
495
496 /*
497 * Create an index node if required.
498 */
499 if (has_index) {
500 node *newnode;
501 int i, j, k;
502 indexentry *entry;
503 char *nodename;
504
505 nodename = info_node_name_for_text(conf.index_text, &conf);
506 newnode = info_node_new(nodename, conf.charset);
507 sfree(nodename);
508
509 newnode->up = topnode;
510
511 currnode->next = newnode;
512 newnode->prev = currnode;
513 currnode->listnext = newnode;
514
515 k = info_rdadds(&newnode->text, conf.index_text);
516 info_rdaddsc(&newnode->text, "\n");
517 while (k > 0) {
518 info_rdadds(&newnode->text, conf.underline);
519 k -= ustrwid(conf.underline, conf.charset);
520 }
521 info_rdaddsc(&newnode->text, "\n\n");
522
523 info_menu_item(&topnode->text, newnode, NULL, &conf);
524
525 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
526 info_idx *ii = (info_idx *)entry->backend_data;
527
528 for (j = 0; j < ii->nnodes; j++) {
529 /*
530 * When we have multiple references for a single
531 * index term, we only display the actual term on
532 * the first line, to make it clear that the terms
533 * really are the same.
534 */
535 if (j == 0)
536 info_rdaddsc(&newnode->text, ii->text);
537 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
538 info_rdaddc(&newnode->text, ' ');
539 info_rdaddsc(&newnode->text, " *Note ");
540 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
541 info_rdaddsc(&newnode->text, "::\n");
542 }
543 }
544 }
545
546 /*
547 * Finalise the text of each node, by adding the ^_ delimiter
548 * and the node line at the top.
549 */
550 for (currnode = topnode; currnode; currnode = currnode->listnext) {
551 char *origtext = currnode->text.output.text;
552 currnode->text = empty_info_data;
553 currnode->text.charset = conf.charset;
554 info_rdaddsc(&currnode->text, "\037\nFile: ");
555 info_rdaddsc(&currnode->text, conf.filename);
556 info_rdaddsc(&currnode->text, ", Node: ");
557 info_rdaddsc(&currnode->text, currnode->name);
558 if (currnode->prev) {
559 info_rdaddsc(&currnode->text, ", Prev: ");
560 info_rdaddsc(&currnode->text, currnode->prev->name);
561 }
562 info_rdaddsc(&currnode->text, ", Up: ");
563 info_rdaddsc(&currnode->text, (currnode->up ?
564 currnode->up->name : "(dir)"));
565 if (currnode->next) {
566 info_rdaddsc(&currnode->text, ", Next: ");
567 info_rdaddsc(&currnode->text, currnode->next->name);
568 }
569 info_rdaddsc(&currnode->text, "\n\n");
570 info_rdaddsc(&currnode->text, origtext);
571 /*
572 * Just make _absolutely_ sure we end with a newline.
573 */
574 if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
575 info_rdaddc(&currnode->text, '\n');
576
577 sfree(origtext);
578 }
579
580 /*
581 * Compute the offsets for the tag table.
582 */
583 filepos = intro_text.output.pos;
584 for (currnode = topnode; currnode; currnode = currnode->listnext) {
585 currnode->pos = filepos;
586 filepos += currnode->text.output.pos;
587 }
588
589 /*
590 * Split into sub-files.
591 */
592 if (conf.maxfilesize > 0) {
593 int currfilesize = intro_text.output.pos, currfilenum = 1;
594 for (currnode = topnode; currnode; currnode = currnode->listnext) {
595 if (currfilesize > intro_text.output.pos &&
596 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
597 currfilenum++;
598 currfilesize = intro_text.output.pos;
599 }
600 currnode->filenum = currfilenum;
601 currfilesize += currnode->text.output.pos;
602 }
603 }
604
605 /*
606 * Write the primary output file.
607 */
608 fp = fopen(conf.filename, "w");
609 if (!fp) {
610 error(err_cantopenw, conf.filename);
611 return;
612 }
613 fputs(intro_text.output.text, fp);
614 if (conf.maxfilesize == 0) {
615 for (currnode = topnode; currnode; currnode = currnode->listnext)
616 fputs(currnode->text.output.text, fp);
617 } else {
618 int filenum = 0;
619 fprintf(fp, "\037\nIndirect:\n");
620 for (currnode = topnode; currnode; currnode = currnode->listnext)
621 if (filenum != currnode->filenum) {
622 filenum = currnode->filenum;
623 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
624 currnode->pos);
625 }
626 }
627 fprintf(fp, "\037\nTag Table:\n");
628 if (conf.maxfilesize > 0)
629 fprintf(fp, "(Indirect)\n");
630 for (currnode = topnode; currnode; currnode = currnode->listnext)
631 fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
632 fprintf(fp, "\037\nEnd Tag Table\n");
633 fclose(fp);
634
635 /*
636 * Write the subfiles.
637 */
638 if (conf.maxfilesize > 0) {
639 int filenum = 0;
640 fp = NULL;
641
642 for (currnode = topnode; currnode; currnode = currnode->listnext) {
643 if (filenum != currnode->filenum) {
644 char *fname;
645
646 filenum = currnode->filenum;
647
648 if (fp)
649 fclose(fp);
650 fname = snewn(strlen(conf.filename) + 40, char);
651 sprintf(fname, "%s-%d", conf.filename, filenum);
652 fp = fopen(fname, "w");
653 if (!fp) {
654 error(err_cantopenw, fname);
655 return;
656 }
657 sfree(fname);
658 fputs(intro_text.output.text, fp);
659 }
660 fputs(currnode->text.output.text, fp);
661 }
662
663 if (fp)
664 fclose(fp);
665 }
666 }
667
668 static int info_check_index(word *w, node *n, indexdata *idx)
669 {
670 int ret = 0;
671
672 for (; w; w = w->next) {
673 if (w->type == word_IndexRef) {
674 indextag *tag;
675 int i;
676
677 tag = index_findtag(idx, w->text);
678 if (!tag)
679 break;
680
681 for (i = 0; i < tag->nrefs; i++) {
682 indexentry *entry = tag->refs[i];
683 info_idx *ii = (info_idx *)entry->backend_data;
684
685 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
686 /*
687 * If the same index term is indexed twice
688 * within the same section, we only want to
689 * mention it once in the index. So do nothing
690 * here.
691 */
692 continue;
693 }
694
695 if (ii->nnodes >= ii->nodesize) {
696 ii->nodesize += 32;
697 ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
698 }
699
700 ii->nodes[ii->nnodes++] = n;
701
702 ret = 1;
703 }
704 }
705 }
706
707 return ret;
708 }
709
710 static word *info_transform_wordlist(word *words, keywordlist *keywords)
711 {
712 word *ret = dup_word_list(words);
713 word *w;
714 keyword *kwl;
715
716 for (w = ret; w; w = w->next) {
717 w->private_data = NULL;
718 if (w->type == word_UpperXref || w->type == word_LowerXref) {
719 kwl = kw_lookup(keywords, w->text);
720 if (kwl) {
721 if (kwl->para->type == para_NumberedList ||
722 kwl->para->type == para_BiblioCited) {
723 /*
724 * In Info, we do nothing special for xrefs to
725 * numbered list items or bibliography entries.
726 */
727 continue;
728 } else {
729 /*
730 * An xref to a different section has its text
731 * completely replaced.
732 */
733 word *w2, *w3, *w4;
734 w2 = w3 = w->next;
735 w4 = NULL;
736 while (w2) {
737 if (w2->type == word_XrefEnd) {
738 w4 = w2->next;
739 w2->next = NULL;
740 break;
741 }
742 w2 = w2->next;
743 }
744 free_word_list(w3);
745
746 /*
747 * Now w is the UpperXref / LowerXref we
748 * started with, and w4 is the next word after
749 * the corresponding XrefEnd (if any). The
750 * simplest thing is just to stick a pointer to
751 * the target node structure in the private
752 * data field of the xref word, and let
753 * info_rdaddwc and friends read the node name
754 * out from there.
755 */
756 w->next = w4;
757 w->private_data = kwl->para->private_data;
758 assert(w->private_data);
759 }
760 }
761 }
762 }
763
764 return ret;
765 }
766
767 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
768 infoconfig *cfg) {
769 int ret = 0;
770
771 for (; words && words != end; words = words->next) switch (words->type) {
772 case word_HyperLink:
773 case word_HyperEnd:
774 case word_XrefEnd:
775 case word_IndexRef:
776 break;
777
778 case word_Normal:
779 case word_Emph:
780 case word_Code:
781 case word_WeakCode:
782 case word_WhiteSpace:
783 case word_EmphSpace:
784 case word_CodeSpace:
785 case word_WkCodeSpace:
786 case word_Quote:
787 case word_EmphQuote:
788 case word_CodeQuote:
789 case word_WkCodeQuote:
790 assert(words->type != word_CodeQuote &&
791 words->type != word_WkCodeQuote);
792 if (towordstyle(words->type) == word_Emph &&
793 (attraux(words->aux) == attr_First ||
794 attraux(words->aux) == attr_Only))
795 ret += info_rdadds(id, cfg->startemph);
796 else if (towordstyle(words->type) == word_Code &&
797 (attraux(words->aux) == attr_First ||
798 attraux(words->aux) == attr_Only))
799 ret += info_rdadds(id, cfg->lquote);
800 if (removeattr(words->type) == word_Normal) {
801 if (cvt_ok(id->charset, words->text) || !words->alt)
802 ret += info_rdadds(id, words->text);
803 else
804 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
805 } else if (removeattr(words->type) == word_WhiteSpace) {
806 ret += info_rdadd(id, L' ');
807 } else if (removeattr(words->type) == word_Quote) {
808 ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
809 cfg->lquote : cfg->rquote);
810 }
811 if (towordstyle(words->type) == word_Emph &&
812 (attraux(words->aux) == attr_Last ||
813 attraux(words->aux) == attr_Only))
814 ret += info_rdadds(id, cfg->endemph);
815 else if (towordstyle(words->type) == word_Code &&
816 (attraux(words->aux) == attr_Last ||
817 attraux(words->aux) == attr_Only))
818 ret += info_rdadds(id, cfg->rquote);
819 break;
820
821 case word_UpperXref:
822 case word_LowerXref:
823 if (xrefs && words->private_data) {
824 /*
825 * This bit is structural and so must be done in char
826 * rather than wchar_t.
827 */
828 ret += info_rdaddsc(id, "*Note ");
829 ret += info_rdaddsc(id, ((node *)words->private_data)->name);
830 ret += info_rdaddsc(id, "::");
831 }
832 break;
833 }
834
835 return ret;
836 }
837
838 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
839
840 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
841 int w = 0;
842 while (words) {
843 w += info_width_internal(words, xrefs, cfg);
844 words = words->next;
845 }
846 return w;
847 }
848
849 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
850 int wid;
851 int attr;
852
853 switch (words->type) {
854 case word_HyperLink:
855 case word_HyperEnd:
856 case word_XrefEnd:
857 case word_IndexRef:
858 return 0;
859
860 case word_UpperXref:
861 case word_LowerXref:
862 if (xrefs && words->private_data) {
863 /* "*Note " plus "::" comes to 8 characters */
864 return 8 + strwid(((node *)words->private_data)->name,
865 cfg->charset);
866 } else
867 return 0;
868 }
869
870 assert(words->type < word_internal_endattrs);
871
872 wid = 0;
873 attr = towordstyle(words->type);
874
875 if (attr == word_Emph || attr == word_Code) {
876 if (attraux(words->aux) == attr_Only ||
877 attraux(words->aux) == attr_First)
878 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
879 cfg->charset);
880 }
881 if (attr == word_Emph || attr == word_Code) {
882 if (attraux(words->aux) == attr_Only ||
883 attraux(words->aux) == attr_Last)
884 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
885 cfg->charset);
886 }
887
888 switch (words->type) {
889 case word_Normal:
890 case word_Emph:
891 case word_Code:
892 case word_WeakCode:
893 if (cvt_ok(cfg->charset, words->text) || !words->alt)
894 wid += ustrwid(words->text, cfg->charset);
895 else
896 wid += info_width_internal_list(words->alt, xrefs, cfg);
897 return wid;
898
899 case word_WhiteSpace:
900 case word_EmphSpace:
901 case word_CodeSpace:
902 case word_WkCodeSpace:
903 case word_Quote:
904 case word_EmphQuote:
905 case word_CodeQuote:
906 case word_WkCodeQuote:
907 assert(words->type != word_CodeQuote &&
908 words->type != word_WkCodeQuote);
909 if (removeattr(words->type) == word_Quote) {
910 if (quoteaux(words->aux) == quote_Open)
911 wid += ustrwid(cfg->lquote, cfg->charset);
912 else
913 wid += ustrwid(cfg->rquote, cfg->charset);
914 } else
915 wid++; /* space */
916 }
917 return wid;
918 }
919
920 static int info_width_noxrefs(void *ctx, word *words)
921 {
922 return info_width_internal(words, FALSE, (infoconfig *)ctx);
923 }
924 static int info_width_xrefs(void *ctx, word *words)
925 {
926 return info_width_internal(words, TRUE, (infoconfig *)ctx);
927 }
928
929 static void info_heading(info_data *text, word *tprefix,
930 word *words, int width, infoconfig *cfg) {
931 int length;
932 int firstlinewidth, wrapwidth;
933 wrappedline *wrapping, *p;
934
935 length = 0;
936 if (tprefix) {
937 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
938 length += info_rdadds(text, cfg->sectsuffix);
939 }
940
941 wrapwidth = width;
942 firstlinewidth = width - length;
943
944 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
945 info_width_noxrefs, cfg, 0);
946 for (p = wrapping; p; p = p->next) {
947 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
948 info_rdadd(text, L'\n');
949 while (length > 0) {
950 info_rdadds(text, cfg->underline);
951 length -= ustrwid(cfg->underline, cfg->charset);
952 }
953 info_rdadd(text, L'\n');
954 length = 0;
955 }
956 wrap_free(wrapping);
957 info_rdadd(text, L'\n');
958 }
959
960 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
961 {
962 while (indent--) info_rdadd(text, L' ');
963 while (width > 0) {
964 info_rdadds(text, cfg->rule);
965 width -= ustrwid(cfg->rule, cfg->charset);
966 }
967 info_rdadd(text, L'\n');
968 info_rdadd(text, L'\n');
969 }
970
971 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
972 word *input, keywordlist *keywords, int indent,
973 int extraindent, int width, infoconfig *cfg) {
974 wrappedline *wrapping, *p;
975 word *words;
976 int e;
977 int i;
978 int firstlinewidth = width;
979
980 words = info_transform_wordlist(input, keywords);
981
982 if (prefix) {
983 for (i = 0; i < indent; i++)
984 info_rdadd(text, L' ');
985 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
986 if (prefixextra)
987 e += info_rdadds(text, prefixextra);
988 /* If the prefix is too long, shorten the first line to fit. */
989 e = extraindent - e;
990 if (e < 0) {
991 firstlinewidth += e; /* this decreases it, since e < 0 */
992 if (firstlinewidth < 0) {
993 e = indent + extraindent;
994 firstlinewidth = width;
995 info_rdadd(text, L'\n');
996 } else
997 e = 0;
998 }
999 } else
1000 e = indent + extraindent;
1001
1002 wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1003 cfg, 0);
1004 for (p = wrapping; p; p = p->next) {
1005 for (i = 0; i < e; i++)
1006 info_rdadd(text, L' ');
1007 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1008 info_rdadd(text, L'\n');
1009 e = indent + extraindent;
1010 }
1011 wrap_free(wrapping);
1012 info_rdadd(text, L'\n');
1013
1014 free_word_list(words);
1015 }
1016
1017 static void info_codepara(info_data *text, word *words,
1018 int indent, int width) {
1019 int i;
1020
1021 for (; words; words = words->next) if (words->type == word_WeakCode) {
1022 for (i = 0; i < indent; i++)
1023 info_rdadd(text, L' ');
1024 if (info_rdadds(text, words->text) > width) {
1025 /* FIXME: warn */
1026 }
1027 info_rdadd(text, L'\n');
1028 }
1029
1030 info_rdadd(text, L'\n');
1031 }
1032
1033 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1034 info_rdadd(text, L'[');
1035 info_rdaddwc(text, words, NULL, FALSE, cfg);
1036 info_rdadds(text, L"]\n");
1037 }
1038
1039 static node *info_node_new(char *name, int charset)
1040 {
1041 node *n;
1042
1043 n = snew(node);
1044 n->text = empty_info_data;
1045 n->text.charset = charset;
1046 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1047 n->name = dupstr(name);
1048 n->started_menu = FALSE;
1049
1050 return n;
1051 }
1052
1053 static char *info_node_name_core(info_data *id, filepos *fpos)
1054 {
1055 char *p, *q;
1056
1057 /*
1058 * We cannot have commas, colons or parentheses in a node name.
1059 * Remove any that we find, with a warning.
1060 */
1061 p = q = id->output.text;
1062 while (*p) {
1063 if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1064 error(err_infonodechar, fpos, *p);
1065 } else {
1066 *q++ = *p;
1067 }
1068 p++;
1069 }
1070 *q = '\0';
1071
1072 return id->output.text;
1073 }
1074
1075 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1076 {
1077 info_data id = EMPTY_INFO_DATA;
1078
1079 id.charset = cfg->charset;
1080 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1081 NULL, FALSE, cfg);
1082 info_rdaddsc(&id, NULL);
1083
1084 return info_node_name_core(&id, &par->fpos);
1085 }
1086
1087 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1088 {
1089 info_data id = EMPTY_INFO_DATA;
1090
1091 id.charset = cfg->charset;
1092 info_rdadds(&id, text);
1093 info_rdaddsc(&id, NULL);
1094
1095 return info_node_name_core(&id, NULL);
1096 }
1097
1098 static void info_menu_item(info_data *text, node *n, paragraph *p,
1099 infoconfig *cfg)
1100 {
1101 /*
1102 * FIXME: Depending on how we're doing node names in this info
1103 * file, we might want to do
1104 *
1105 * * Node name:: Chapter title
1106 *
1107 * _or_
1108 *
1109 * * Chapter number: Node name.
1110 *
1111 * This function mostly works in char rather than wchar_t,
1112 * because a menu item is a structural component.
1113 */
1114 info_rdaddsc(text, "* ");
1115 info_rdaddsc(text, n->name);
1116 info_rdaddsc(text, "::");
1117 if (p) {
1118 info_rdaddc(text, ' ');
1119 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1120 }
1121 info_rdaddc(text, '\n');
1122 }
1123
1124 /*
1125 * These functions implement my wrapper on the rdadd* calls which
1126 * allows me to switch arbitrarily between literal octet-string
1127 * text and charset-translated Unicode. (Because no matter what
1128 * character set I write the actual text in, I expect info readers
1129 * to treat node names and file names literally and to expect
1130 * keywords like `*Note' in their canonical form, so I have to take
1131 * steps to ensure that those structural elements of the file
1132 * aren't messed with.)
1133 */
1134 static int info_rdadds(info_data *d, wchar_t const *wcs)
1135 {
1136 if (!d->wcmode) {
1137 d->state = charset_init_state;
1138 d->wcmode = TRUE;
1139 }
1140
1141 if (wcs) {
1142 char buf[256];
1143 int len, width, ret;
1144
1145 width = ustrwid(wcs, d->charset);
1146
1147 len = ustrlen(wcs);
1148 while (len > 0) {
1149 int prevlen = len;
1150
1151 ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1152 d->charset, &d->state, NULL);
1153
1154 assert(len < prevlen);
1155
1156 if (ret > 0) {
1157 buf[ret] = '\0';
1158 rdaddsc(&d->output, buf);
1159 }
1160 }
1161
1162 return width;
1163 } else
1164 return 0;
1165 }
1166
1167 static int info_rdaddsc(info_data *d, char const *cs)
1168 {
1169 if (d->wcmode) {
1170 char buf[256];
1171 int ret;
1172
1173 ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1174 d->charset, &d->state, NULL);
1175 if (ret > 0) {
1176 buf[ret] = '\0';
1177 rdaddsc(&d->output, buf);
1178 }
1179
1180 d->wcmode = FALSE;
1181 }
1182
1183 if (cs) {
1184 rdaddsc(&d->output, cs);
1185 return strwid(cs, d->charset);
1186 } else
1187 return 0;
1188 }
1189
1190 static int info_rdadd(info_data *d, wchar_t wc)
1191 {
1192 wchar_t wcs[2];
1193 wcs[0] = wc;
1194 wcs[1] = L'\0';
1195 return info_rdadds(d, wcs);
1196 }
1197
1198 static int info_rdaddc(info_data *d, char c)
1199 {
1200 char cs[2];
1201 cs[0] = c;
1202 cs[1] = '\0';
1203 return info_rdaddsc(d, cs);
1204 }