General configurability upgrade for the info back end.
[sgt/halibut] / bk_info.c
1 /*
2 * info backend for Halibut
3 *
4 * Possible future work:
5 *
6 * - configurable choice of how to allocate node names?
7 * + possibly a template-like approach, choosing node names to
8 * be the full section title or perhaps the internal keyword?
9 * + neither of those seems quite right. Perhaps instead a
10 * Windows Help-like mechanism, where a magic config
11 * directive allows user choice of name for every node.
12 * + Only trouble with that is, now what happens to the section
13 * numbers? Do they become completely vestigial and just sit
14 * in the title text of each node? Or do we keep them in the
15 * menus somehow? I think people might occasionally want to
16 * go to a section by number, if only because all the _other_
17 * formats of the same document will reference the numbers
18 * all the time. So our menu lines could look like one of
19 * these:
20 * * Nodename: Section 1.2. Title of section.
21 * * Section 1.2: Nodename. Title of section.
22 *
23 * - might be helpful to diagnose duplicate node names!
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <assert.h>
29 #include "halibut.h"
30
31 typedef struct {
32 char *filename;
33 int maxfilesize;
34 int charset;
35 int listindentbefore, listindentafter;
36 int indent_code, width, index_width;
37 wchar_t *bullet, *listsuffix;
38 wchar_t *startemph, *endemph;
39 wchar_t *lquote, *rquote;
40 wchar_t *sectsuffix, *underline;
41 wchar_t *rule;
42 } infoconfig;
43
44 typedef struct {
45 rdstringc output;
46 int charset;
47 charset_state state;
48 int wcmode;
49 } info_data;
50 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
51 static const info_data empty_info_data = EMPTY_INFO_DATA;
52
53 typedef struct node_tag node;
54 struct node_tag {
55 node *listnext;
56 node *up, *prev, *next, *lastchild;
57 int pos, started_menu, filenum;
58 char *name;
59 info_data text;
60 };
61
62 typedef struct {
63 char *text;
64 int length;
65 int nnodes, nodesize;
66 node **nodes;
67 } info_idx;
68
69 static int info_rdadd(info_data *, wchar_t);
70 static int info_rdadds(info_data *, wchar_t const *);
71 static int info_rdaddc(info_data *, char);
72 static int info_rdaddsc(info_data *, char const *);
73
74 static void info_heading(info_data *, word *, word *, int, infoconfig *);
75 static void info_rule(info_data *, int, int, infoconfig *);
76 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
77 int, int, int, infoconfig *);
78 static void info_codepara(info_data *, word *, int, int);
79 static void info_versionid(info_data *, word *, infoconfig *);
80 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
81 static word *info_transform_wordlist(word *, keywordlist *);
82 static int info_check_index(word *, node *, indexdata *);
83
84 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
85
86 static node *info_node_new(char *name, int charset);
87 static char *info_node_name(paragraph *p, infoconfig *);
88
89 static infoconfig info_configure(paragraph *source) {
90 infoconfig ret;
91 paragraph *p;
92
93 /*
94 * Defaults.
95 */
96 ret.filename = dupstr("output.info");
97 ret.maxfilesize = 64 << 10;
98 ret.charset = CS_ASCII;
99 ret.width = 70;
100 ret.listindentbefore = 1;
101 ret.listindentafter = 3;
102 ret.indent_code = 2;
103 ret.index_width = 40;
104 ret.listsuffix = L".";
105 ret.bullet = L"\x2022\0-\0\0";
106 ret.rule = L"\x2500\0-\0\0";
107 ret.startemph = L"_\0_\0\0";
108 ret.endemph = uadv(ret.startemph);
109 ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
110 ret.rquote = uadv(ret.lquote);
111 ret.sectsuffix = L": ";
112 ret.underline = L"\x203E\0-\0\0";
113
114 /*
115 * Two-pass configuration so that we can pick up global config
116 * (e.g. `quotes') before having it overridden by specific
117 * config (`info-quotes'), irrespective of the order in which
118 * they occur.
119 */
120 for (p = source; p; p = p->next) {
121 if (p->type == para_Config) {
122 if (!ustricmp(p->keyword, L"quotes")) {
123 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
124 ret.lquote = uadv(p->keyword);
125 ret.rquote = uadv(ret.lquote);
126 }
127 }
128 }
129 }
130
131 for (p = source; p; p = p->next) {
132 if (p->type == para_Config) {
133 if (!ustricmp(p->keyword, L"info-filename")) {
134 sfree(ret.filename);
135 ret.filename = dupstr(adv(p->origkeyword));
136 } else if (!ustricmp(p->keyword, L"info-charset")) {
137 char *csname = utoa_dup(uadv(p->keyword), CS_ASCII);
138 ret.charset = charset_from_localenc(csname);
139 sfree(csname);
140 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
141 ret.maxfilesize = utoi(uadv(p->keyword));
142 } else if (!ustricmp(p->keyword, L"info-width")) {
143 ret.width = utoi(uadv(p->keyword));
144 } else if (!ustricmp(p->keyword, L"info-indent-code")) {
145 ret.indent_code = utoi(uadv(p->keyword));
146 } else if (!ustricmp(p->keyword, L"info-index-width")) {
147 ret.index_width = utoi(uadv(p->keyword));
148 } else if (!ustricmp(p->keyword, L"info-list-indent")) {
149 ret.listindentbefore = utoi(uadv(p->keyword));
150 } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
151 ret.listindentafter = utoi(uadv(p->keyword));
152 } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
153 ret.sectsuffix = uadv(p->keyword);
154 } else if (!ustricmp(p->keyword, L"info-underline")) {
155 ret.underline = uadv(p->keyword);
156 } else if (!ustricmp(p->keyword, L"info-bullet")) {
157 ret.bullet = uadv(p->keyword);
158 } else if (!ustricmp(p->keyword, L"info-rule")) {
159 ret.rule = uadv(p->keyword);
160 } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
161 ret.listsuffix = uadv(p->keyword);
162 } else if (!ustricmp(p->keyword, L"info-emphasis")) {
163 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
164 ret.startemph = uadv(p->keyword);
165 ret.endemph = uadv(ret.startemph);
166 }
167 } else if (!ustricmp(p->keyword, L"info-quotes")) {
168 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
169 ret.lquote = uadv(p->keyword);
170 ret.rquote = uadv(ret.lquote);
171 }
172 }
173 }
174 }
175
176 /*
177 * Now process fallbacks on quote characters, underlines, the
178 * rule character, the emphasis characters, and bullets.
179 */
180 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
181 (!cvt_ok(ret.charset, ret.lquote) ||
182 !cvt_ok(ret.charset, ret.rquote))) {
183 ret.lquote = uadv(ret.rquote);
184 ret.rquote = uadv(ret.lquote);
185 }
186
187 while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
188 (!cvt_ok(ret.charset, ret.startemph) ||
189 !cvt_ok(ret.charset, ret.endemph))) {
190 ret.startemph = uadv(ret.endemph);
191 ret.endemph = uadv(ret.startemph);
192 }
193
194 while (*ret.underline && *uadv(ret.underline) &&
195 !cvt_ok(ret.charset, ret.underline))
196 ret.underline = uadv(ret.underline);
197
198 while (*ret.bullet && *uadv(ret.bullet) &&
199 !cvt_ok(ret.charset, ret.bullet))
200 ret.bullet = uadv(ret.bullet);
201
202 while (*ret.rule && *uadv(ret.rule) &&
203 !cvt_ok(ret.charset, ret.rule))
204 ret.rule = uadv(ret.rule);
205
206 return ret;
207 }
208
209 paragraph *info_config_filename(char *filename)
210 {
211 return cmdline_cfg_simple("info-filename", filename, NULL);
212 }
213
214 void info_backend(paragraph *sourceform, keywordlist *keywords,
215 indexdata *idx, void *unused) {
216 paragraph *p;
217 infoconfig conf;
218 word *prefix, *body, *wp;
219 word spaceword;
220 wchar_t *prefixextra;
221 int nesting, nestindent;
222 int indentb, indenta;
223 int filepos;
224 int has_index;
225 info_data intro_text = EMPTY_INFO_DATA;
226 node *topnode, *currnode;
227 word bullet;
228 FILE *fp;
229
230 IGNORE(unused);
231
232 conf = info_configure(sourceform);
233
234 /*
235 * Go through and create a node for each section.
236 */
237 topnode = info_node_new("Top", conf.charset);
238 currnode = topnode;
239 for (p = sourceform; p; p = p->next) switch (p->type) {
240 /*
241 * Chapter titles.
242 */
243 case para_Chapter:
244 case para_Appendix:
245 case para_UnnumberedChapter:
246 case para_Heading:
247 case para_Subsect:
248 {
249 node *newnode, *upnode;
250 char *nodename;
251
252 nodename = info_node_name(p, &conf);
253 newnode = info_node_new(nodename, conf.charset);
254 sfree(nodename);
255
256 p->private_data = newnode;
257
258 if (p->parent)
259 upnode = (node *)p->parent->private_data;
260 else
261 upnode = topnode;
262 assert(upnode);
263 newnode->up = upnode;
264
265 currnode->next = newnode;
266 newnode->prev = currnode;
267
268 currnode->listnext = newnode;
269 currnode = newnode;
270 }
271 break;
272 }
273
274 /*
275 * Set up the display form of each index entry.
276 */
277 {
278 int i;
279 indexentry *entry;
280
281 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
282 info_idx *ii = mknew(info_idx);
283 info_data id = EMPTY_INFO_DATA;
284
285 id.charset = conf.charset;
286
287 ii->nnodes = ii->nodesize = 0;
288 ii->nodes = NULL;
289
290 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
291
292 ii->text = id.output.text;
293
294 entry->backend_data = ii;
295 }
296 }
297
298 /*
299 * An Info file begins with a piece of introductory text which
300 * is apparently never shown anywhere. This seems to me to be a
301 * good place to put the copyright notice and the version IDs.
302 * Also, Info directory entries are expected to go here.
303 */
304 intro_text.charset = conf.charset;
305
306 info_rdaddsc(&intro_text,
307 "This Info file generated by Halibut, ");
308 info_rdaddsc(&intro_text, version);
309 info_rdaddsc(&intro_text, "\n\n");
310
311 for (p = sourceform; p; p = p->next)
312 if (p->type == para_Config &&
313 !ustricmp(p->keyword, L"info-dir-entry")) {
314 wchar_t *section, *shortname, *longname, *kw;
315 char *s;
316
317 section = uadv(p->keyword);
318 shortname = *section ? uadv(section) : NULL;
319 longname = *shortname ? uadv(shortname) : NULL;
320 kw = *longname ? uadv(longname) : NULL;
321
322 if (!*longname) {
323 error(err_infodirentry, &p->fpos);
324 continue;
325 }
326
327 info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
328 info_rdadds(&intro_text, section);
329 info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
330 info_rdadds(&intro_text, shortname);
331 info_rdaddsc(&intro_text, ": (");
332 s = dupstr(conf.filename);
333 if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
334 s[strlen(s)-5] = '\0';
335 info_rdaddsc(&intro_text, s);
336 sfree(s);
337 info_rdaddsc(&intro_text, ")");
338 if (*kw) {
339 keyword *kwl = kw_lookup(keywords, kw);
340 if (kwl && kwl->para->private_data) {
341 node *n = (node *)kwl->para->private_data;
342 info_rdaddsc(&intro_text, n->name);
343 }
344 }
345 info_rdaddsc(&intro_text, ". ");
346 info_rdadds(&intro_text, longname);
347 info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
348 }
349
350 for (p = sourceform; p; p = p->next)
351 if (p->type == para_Copyright)
352 info_para(&intro_text, NULL, NULL, p->words, keywords,
353 0, 0, conf.width, &conf);
354
355 for (p = sourceform; p; p = p->next)
356 if (p->type == para_VersionID)
357 info_versionid(&intro_text, p->words, &conf);
358
359 if (intro_text.output.text[intro_text.output.pos-1] != '\n')
360 info_rdaddc(&intro_text, '\n');
361
362 /* Do the title */
363 for (p = sourceform; p; p = p->next)
364 if (p->type == para_Title)
365 info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
366
367 nestindent = conf.listindentbefore + conf.listindentafter;
368 nesting = 0;
369
370 currnode = topnode;
371
372 /* Do the main document */
373 for (p = sourceform; p; p = p->next) switch (p->type) {
374
375 case para_QuotePush:
376 nesting += 2;
377 break;
378 case para_QuotePop:
379 nesting -= 2;
380 assert(nesting >= 0);
381 break;
382
383 case para_LcontPush:
384 nesting += nestindent;
385 break;
386 case para_LcontPop:
387 nesting -= nestindent;
388 assert(nesting >= 0);
389 break;
390
391 /*
392 * Things we ignore because we've already processed them or
393 * aren't going to touch them in this pass.
394 */
395 case para_IM:
396 case para_BR:
397 case para_Biblio: /* only touch BiblioCited */
398 case para_VersionID:
399 case para_NoCite:
400 case para_Title:
401 break;
402
403 /*
404 * Chapter titles.
405 */
406 case para_Chapter:
407 case para_Appendix:
408 case para_UnnumberedChapter:
409 case para_Heading:
410 case para_Subsect:
411 currnode = p->private_data;
412 assert(currnode);
413 assert(currnode->up);
414
415 if (!currnode->up->started_menu) {
416 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
417 currnode->up->started_menu = TRUE;
418 }
419 info_menu_item(&currnode->up->text, currnode, p, &conf);
420
421 has_index |= info_check_index(p->words, currnode, idx);
422 info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
423 nesting = 0;
424 break;
425
426 case para_Rule:
427 info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
428 break;
429
430 case para_Normal:
431 case para_Copyright:
432 case para_DescribedThing:
433 case para_Description:
434 case para_BiblioCited:
435 case para_Bullet:
436 case para_NumberedList:
437 has_index |= info_check_index(p->words, currnode, idx);
438 if (p->type == para_Bullet) {
439 bullet.next = NULL;
440 bullet.alt = NULL;
441 bullet.type = word_Normal;
442 bullet.text = conf.bullet;
443 prefix = &bullet;
444 prefixextra = NULL;
445 indentb = conf.listindentbefore;
446 indenta = conf.listindentafter;
447 } else if (p->type == para_NumberedList) {
448 prefix = p->kwtext;
449 prefixextra = conf.listsuffix;
450 indentb = conf.listindentbefore;
451 indenta = conf.listindentafter;
452 } else if (p->type == para_Description) {
453 prefix = NULL;
454 prefixextra = NULL;
455 indentb = conf.listindentbefore;
456 indenta = conf.listindentafter;
457 } else {
458 prefix = NULL;
459 prefixextra = NULL;
460 indentb = indenta = 0;
461 }
462 if (p->type == para_BiblioCited) {
463 body = dup_word_list(p->kwtext);
464 for (wp = body; wp->next; wp = wp->next);
465 wp->next = &spaceword;
466 spaceword.next = p->words;
467 spaceword.alt = NULL;
468 spaceword.type = word_WhiteSpace;
469 spaceword.text = NULL;
470 } else {
471 wp = NULL;
472 body = p->words;
473 }
474 info_para(&currnode->text, prefix, prefixextra, body, keywords,
475 nesting + indentb, indenta,
476 conf.width - nesting - indentb - indenta, &conf);
477 if (wp) {
478 wp->next = NULL;
479 free_word_list(body);
480 }
481 break;
482
483 case para_Code:
484 info_codepara(&currnode->text, p->words,
485 nesting + conf.indent_code,
486 conf.width - nesting - 2 * conf.indent_code);
487 break;
488 }
489
490 /*
491 * Create an index node if required.
492 */
493 if (has_index) {
494 node *newnode;
495 int i, j, k;
496 indexentry *entry;
497
498 newnode = info_node_new("Index", conf.charset);
499 newnode->up = topnode;
500
501 currnode->next = newnode;
502 newnode->prev = currnode;
503 currnode->listnext = newnode;
504
505 info_rdaddsc(&newnode->text, "Index\n-----\n\n");
506
507 info_menu_item(&topnode->text, newnode, NULL, &conf);
508
509 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
510 info_idx *ii = (info_idx *)entry->backend_data;
511
512 for (j = 0; j < ii->nnodes; j++) {
513 /*
514 * When we have multiple references for a single
515 * index term, we only display the actual term on
516 * the first line, to make it clear that the terms
517 * really are the same.
518 */
519 if (j == 0)
520 info_rdaddsc(&newnode->text, ii->text);
521 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
522 info_rdaddc(&newnode->text, ' ');
523 info_rdaddsc(&newnode->text, " *Note ");
524 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
525 info_rdaddsc(&newnode->text, "::\n");
526 }
527 }
528 }
529
530 /*
531 * Finalise the text of each node, by adding the ^_ delimiter
532 * and the node line at the top.
533 */
534 for (currnode = topnode; currnode; currnode = currnode->listnext) {
535 char *origtext = currnode->text.output.text;
536 currnode->text = empty_info_data;
537 currnode->text.charset = conf.charset;
538 info_rdaddsc(&currnode->text, "\037\nFile: ");
539 info_rdaddsc(&currnode->text, conf.filename);
540 info_rdaddsc(&currnode->text, ", Node: ");
541 info_rdaddsc(&currnode->text, currnode->name);
542 if (currnode->prev) {
543 info_rdaddsc(&currnode->text, ", Prev: ");
544 info_rdaddsc(&currnode->text, currnode->prev->name);
545 }
546 info_rdaddsc(&currnode->text, ", Up: ");
547 info_rdaddsc(&currnode->text, (currnode->up ?
548 currnode->up->name : "(dir)"));
549 if (currnode->next) {
550 info_rdaddsc(&currnode->text, ", Next: ");
551 info_rdaddsc(&currnode->text, currnode->next->name);
552 }
553 info_rdaddsc(&currnode->text, "\n\n");
554 info_rdaddsc(&currnode->text, origtext);
555 /*
556 * Just make _absolutely_ sure we end with a newline.
557 */
558 if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
559 info_rdaddc(&currnode->text, '\n');
560
561 sfree(origtext);
562 }
563
564 /*
565 * Compute the offsets for the tag table.
566 */
567 filepos = intro_text.output.pos;
568 for (currnode = topnode; currnode; currnode = currnode->listnext) {
569 currnode->pos = filepos;
570 filepos += currnode->text.output.pos;
571 }
572
573 /*
574 * Split into sub-files.
575 */
576 if (conf.maxfilesize > 0) {
577 int currfilesize = intro_text.output.pos, currfilenum = 1;
578 for (currnode = topnode; currnode; currnode = currnode->listnext) {
579 if (currfilesize > intro_text.output.pos &&
580 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
581 currfilenum++;
582 currfilesize = intro_text.output.pos;
583 }
584 currnode->filenum = currfilenum;
585 currfilesize += currnode->text.output.pos;
586 }
587 }
588
589 /*
590 * Write the primary output file.
591 */
592 fp = fopen(conf.filename, "w");
593 if (!fp) {
594 error(err_cantopenw, conf.filename);
595 return;
596 }
597 fputs(intro_text.output.text, fp);
598 if (conf.maxfilesize == 0) {
599 for (currnode = topnode; currnode; currnode = currnode->listnext)
600 fputs(currnode->text.output.text, fp);
601 } else {
602 int filenum = 0;
603 fprintf(fp, "\037\nIndirect:\n");
604 for (currnode = topnode; currnode; currnode = currnode->listnext)
605 if (filenum != currnode->filenum) {
606 filenum = currnode->filenum;
607 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
608 currnode->pos);
609 }
610 }
611 fprintf(fp, "\037\nTag Table:\n");
612 if (conf.maxfilesize > 0)
613 fprintf(fp, "(Indirect)\n");
614 for (currnode = topnode; currnode; currnode = currnode->listnext)
615 fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
616 fprintf(fp, "\037\nEnd Tag Table\n");
617 fclose(fp);
618
619 /*
620 * Write the subfiles.
621 */
622 if (conf.maxfilesize > 0) {
623 int filenum = 0;
624 fp = NULL;
625
626 for (currnode = topnode; currnode; currnode = currnode->listnext) {
627 if (filenum != currnode->filenum) {
628 char *fname;
629
630 filenum = currnode->filenum;
631
632 if (fp)
633 fclose(fp);
634 fname = mknewa(char, strlen(conf.filename) + 40);
635 sprintf(fname, "%s-%d", conf.filename, filenum);
636 fp = fopen(fname, "w");
637 if (!fp) {
638 error(err_cantopenw, fname);
639 return;
640 }
641 sfree(fname);
642 fputs(intro_text.output.text, fp);
643 }
644 fputs(currnode->text.output.text, fp);
645 }
646
647 if (fp)
648 fclose(fp);
649 }
650 }
651
652 static int info_check_index(word *w, node *n, indexdata *idx)
653 {
654 int ret = 0;
655
656 for (; w; w = w->next) {
657 if (w->type == word_IndexRef) {
658 indextag *tag;
659 int i;
660
661 tag = index_findtag(idx, w->text);
662 if (!tag)
663 break;
664
665 for (i = 0; i < tag->nrefs; i++) {
666 indexentry *entry = tag->refs[i];
667 info_idx *ii = (info_idx *)entry->backend_data;
668
669 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
670 /*
671 * If the same index term is indexed twice
672 * within the same section, we only want to
673 * mention it once in the index. So do nothing
674 * here.
675 */
676 continue;
677 }
678
679 if (ii->nnodes >= ii->nodesize) {
680 ii->nodesize += 32;
681 ii->nodes = resize(ii->nodes, ii->nodesize);
682 }
683
684 ii->nodes[ii->nnodes++] = n;
685
686 ret = 1;
687 }
688 }
689 }
690
691 return ret;
692 }
693
694 static word *info_transform_wordlist(word *words, keywordlist *keywords)
695 {
696 word *ret = dup_word_list(words);
697 word *w;
698 keyword *kwl;
699
700 for (w = ret; w; w = w->next) {
701 w->private_data = NULL;
702 if (w->type == word_UpperXref || w->type == word_LowerXref) {
703 kwl = kw_lookup(keywords, w->text);
704 if (kwl) {
705 if (kwl->para->type == para_NumberedList ||
706 kwl->para->type == para_BiblioCited) {
707 /*
708 * In Info, we do nothing special for xrefs to
709 * numbered list items or bibliography entries.
710 */
711 break;
712 } else {
713 /*
714 * An xref to a different section has its text
715 * completely replaced.
716 */
717 word *w2, *w3, *w4;
718 w2 = w3 = w->next;
719 w4 = NULL;
720 while (w2) {
721 if (w2->type == word_XrefEnd) {
722 w4 = w2->next;
723 w2->next = NULL;
724 break;
725 }
726 w2 = w2->next;
727 }
728 free_word_list(w3);
729
730 /*
731 * Now w is the UpperXref / LowerXref we
732 * started with, and w4 is the next word after
733 * the corresponding XrefEnd (if any). The
734 * simplest thing is just to stick a pointer to
735 * the target node structure in the private
736 * data field of the xref word, and let
737 * info_rdaddwc and friends read the node name
738 * out from there.
739 */
740 w->next = w4;
741 w->private_data = kwl->para->private_data;
742 assert(w->private_data);
743 }
744 }
745 }
746 }
747
748 return ret;
749 }
750
751 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
752 infoconfig *cfg) {
753 int ret = 0;
754
755 for (; words && words != end; words = words->next) switch (words->type) {
756 case word_HyperLink:
757 case word_HyperEnd:
758 case word_XrefEnd:
759 case word_IndexRef:
760 break;
761
762 case word_Normal:
763 case word_Emph:
764 case word_Code:
765 case word_WeakCode:
766 case word_WhiteSpace:
767 case word_EmphSpace:
768 case word_CodeSpace:
769 case word_WkCodeSpace:
770 case word_Quote:
771 case word_EmphQuote:
772 case word_CodeQuote:
773 case word_WkCodeQuote:
774 assert(words->type != word_CodeQuote &&
775 words->type != word_WkCodeQuote);
776 if (towordstyle(words->type) == word_Emph &&
777 (attraux(words->aux) == attr_First ||
778 attraux(words->aux) == attr_Only))
779 ret += info_rdadds(id, cfg->startemph);
780 else if (towordstyle(words->type) == word_Code &&
781 (attraux(words->aux) == attr_First ||
782 attraux(words->aux) == attr_Only))
783 ret += info_rdadds(id, cfg->lquote);
784 if (removeattr(words->type) == word_Normal) {
785 if (cvt_ok(id->charset, words->text) || !words->alt)
786 ret += info_rdadds(id, words->text);
787 else
788 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
789 } else if (removeattr(words->type) == word_WhiteSpace) {
790 ret += info_rdadd(id, L' ');
791 } else if (removeattr(words->type) == word_Quote) {
792 ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
793 cfg->lquote : cfg->rquote);
794 }
795 if (towordstyle(words->type) == word_Emph &&
796 (attraux(words->aux) == attr_Last ||
797 attraux(words->aux) == attr_Only))
798 ret += info_rdadds(id, cfg->endemph);
799 else if (towordstyle(words->type) == word_Code &&
800 (attraux(words->aux) == attr_Last ||
801 attraux(words->aux) == attr_Only))
802 ret += info_rdadds(id, cfg->rquote);
803 break;
804
805 case word_UpperXref:
806 case word_LowerXref:
807 if (xrefs && words->private_data) {
808 /*
809 * This bit is structural and so must be done in char
810 * rather than wchar_t.
811 */
812 ret += info_rdaddsc(id, "*Note ");
813 ret += info_rdaddsc(id, ((node *)words->private_data)->name);
814 ret += info_rdaddsc(id, "::");
815 }
816 break;
817 }
818
819 return ret;
820 }
821
822 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
823
824 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
825 int w = 0;
826 while (words) {
827 w += info_width_internal(words, xrefs, cfg);
828 words = words->next;
829 }
830 return w;
831 }
832
833 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
834 int wid;
835 int attr;
836
837 switch (words->type) {
838 case word_HyperLink:
839 case word_HyperEnd:
840 case word_XrefEnd:
841 case word_IndexRef:
842 return 0;
843
844 case word_UpperXref:
845 case word_LowerXref:
846 if (xrefs && words->private_data) {
847 /* "*Note " plus "::" comes to 8 characters */
848 return 8 + strwid(((node *)words->private_data)->name,
849 cfg->charset);
850 } else
851 return 0;
852 }
853
854 assert(words->type < word_internal_endattrs);
855
856 wid = 0;
857 attr = towordstyle(words->type);
858
859 if (attr == word_Emph || attr == word_Code) {
860 if (attraux(words->aux) == attr_Only ||
861 attraux(words->aux) == attr_First)
862 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
863 cfg->charset);
864 }
865 if (attr == word_Emph || attr == word_Code) {
866 if (attraux(words->aux) == attr_Only ||
867 attraux(words->aux) == attr_Last)
868 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
869 cfg->charset);
870 }
871
872 switch (words->type) {
873 case word_Normal:
874 case word_Emph:
875 case word_Code:
876 case word_WeakCode:
877 if (cvt_ok(cfg->charset, words->text) || !words->alt)
878 wid += ustrwid(words->text, cfg->charset);
879 else
880 wid += info_width_internal_list(words->alt, xrefs, cfg);
881 return wid;
882
883 case word_WhiteSpace:
884 case word_EmphSpace:
885 case word_CodeSpace:
886 case word_WkCodeSpace:
887 case word_Quote:
888 case word_EmphQuote:
889 case word_CodeQuote:
890 case word_WkCodeQuote:
891 assert(words->type != word_CodeQuote &&
892 words->type != word_WkCodeQuote);
893 if (removeattr(words->type) == word_Quote) {
894 if (quoteaux(words->aux) == quote_Open)
895 wid += ustrwid(cfg->lquote, cfg->charset);
896 else
897 wid += ustrwid(cfg->rquote, cfg->charset);
898 } else
899 wid++; /* space */
900 }
901 return wid;
902 }
903
904 static int info_width_noxrefs(void *ctx, word *words)
905 {
906 return info_width_internal(words, FALSE, (infoconfig *)ctx);
907 }
908 static int info_width_xrefs(void *ctx, word *words)
909 {
910 return info_width_internal(words, TRUE, (infoconfig *)ctx);
911 }
912
913 static void info_heading(info_data *text, word *tprefix,
914 word *words, int width, infoconfig *cfg) {
915 int length;
916 int firstlinewidth, wrapwidth;
917 wrappedline *wrapping, *p;
918
919 length = 0;
920 if (tprefix) {
921 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
922 length += info_rdadds(text, cfg->sectsuffix);
923 }
924
925 wrapwidth = width;
926 firstlinewidth = width - length;
927
928 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
929 info_width_noxrefs, cfg, 0);
930 for (p = wrapping; p; p = p->next) {
931 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
932 info_rdadd(text, L'\n');
933 while (length > 0) {
934 info_rdadds(text, cfg->underline);
935 length -= ustrwid(cfg->underline, cfg->charset);
936 }
937 info_rdadd(text, L'\n');
938 length = 0;
939 }
940 wrap_free(wrapping);
941 info_rdadd(text, L'\n');
942 }
943
944 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
945 {
946 while (indent--) info_rdadd(text, L' ');
947 while (width > 0) {
948 info_rdadds(text, cfg->rule);
949 width -= ustrwid(cfg->rule, cfg->charset);
950 }
951 info_rdadd(text, L'\n');
952 info_rdadd(text, L'\n');
953 }
954
955 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
956 word *input, keywordlist *keywords, int indent,
957 int extraindent, int width, infoconfig *cfg) {
958 wrappedline *wrapping, *p;
959 word *words;
960 int e;
961 int i;
962 int firstlinewidth = width;
963
964 words = info_transform_wordlist(input, keywords);
965
966 if (prefix) {
967 for (i = 0; i < indent; i++)
968 info_rdadd(text, L' ');
969 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
970 if (prefixextra)
971 e += info_rdadds(text, prefixextra);
972 /* If the prefix is too long, shorten the first line to fit. */
973 e = extraindent - e;
974 if (e < 0) {
975 firstlinewidth += e; /* this decreases it, since e < 0 */
976 if (firstlinewidth < 0) {
977 e = indent + extraindent;
978 firstlinewidth = width;
979 info_rdadd(text, L'\n');
980 } else
981 e = 0;
982 }
983 } else
984 e = indent + extraindent;
985
986 wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
987 cfg, 0);
988 for (p = wrapping; p; p = p->next) {
989 for (i = 0; i < e; i++)
990 info_rdadd(text, L' ');
991 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
992 info_rdadd(text, L'\n');
993 e = indent + extraindent;
994 }
995 wrap_free(wrapping);
996 info_rdadd(text, L'\n');
997
998 free_word_list(words);
999 }
1000
1001 static void info_codepara(info_data *text, word *words,
1002 int indent, int width) {
1003 int i;
1004
1005 for (; words; words = words->next) if (words->type == word_WeakCode) {
1006 for (i = 0; i < indent; i++)
1007 info_rdadd(text, L' ');
1008 if (info_rdadds(text, words->text) > width) {
1009 /* FIXME: warn */
1010 }
1011 info_rdadd(text, L'\n');
1012 }
1013
1014 info_rdadd(text, L'\n');
1015 }
1016
1017 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1018 info_rdadd(text, L'[');
1019 info_rdaddwc(text, words, NULL, FALSE, cfg);
1020 info_rdadds(text, L"]\n");
1021 }
1022
1023 static node *info_node_new(char *name, int charset)
1024 {
1025 node *n;
1026
1027 n = mknew(node);
1028 n->text = empty_info_data;
1029 n->text.charset = charset;
1030 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1031 n->name = dupstr(name);
1032 n->started_menu = FALSE;
1033
1034 return n;
1035 }
1036
1037 static char *info_node_name(paragraph *par, infoconfig *cfg)
1038 {
1039 info_data id = EMPTY_INFO_DATA;
1040 char *p, *q;
1041
1042 id.charset = cfg->charset;
1043 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1044 NULL, FALSE, cfg);
1045 info_rdaddsc(&id, NULL);
1046
1047 /*
1048 * We cannot have commas or colons in a node name. Remove any
1049 * that we find, with a warning.
1050 */
1051 p = q = id.output.text;
1052 while (*p) {
1053 if (*p == ':' || *p == ',') {
1054 error(err_infonodechar, &par->fpos, *p);
1055 } else {
1056 *q++ = *p;
1057 }
1058 p++;
1059 }
1060 *p = '\0';
1061
1062 return id.output.text;
1063 }
1064
1065 static void info_menu_item(info_data *text, node *n, paragraph *p,
1066 infoconfig *cfg)
1067 {
1068 /*
1069 * FIXME: Depending on how we're doing node names in this info
1070 * file, we might want to do
1071 *
1072 * * Node name:: Chapter title
1073 *
1074 * _or_
1075 *
1076 * * Chapter number: Node name.
1077 *
1078 * This function mostly works in char rather than wchar_t,
1079 * because a menu item is a structural component.
1080 */
1081 info_rdaddsc(text, "* ");
1082 info_rdaddsc(text, n->name);
1083 info_rdaddsc(text, "::");
1084 if (p) {
1085 info_rdaddc(text, ' ');
1086 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1087 }
1088 info_rdaddc(text, '\n');
1089 }
1090
1091 /*
1092 * These functions implement my wrapper on the rdadd* calls which
1093 * allows me to switch arbitrarily between literal octet-string
1094 * text and charset-translated Unicode. (Because no matter what
1095 * character set I write the actual text in, I expect info readers
1096 * to treat node names and file names literally and to expect
1097 * keywords like `*Note' in their canonical form, so I have to take
1098 * steps to ensure that those structural elements of the file
1099 * aren't messed with.)
1100 */
1101 static int info_rdadds(info_data *d, wchar_t const *wcs)
1102 {
1103 if (!d->wcmode) {
1104 d->state = charset_init_state;
1105 d->wcmode = TRUE;
1106 }
1107
1108 if (wcs) {
1109 char buf[256];
1110 int len, width, ret;
1111
1112 width = ustrwid(wcs, d->charset);
1113
1114 len = ustrlen(wcs);
1115 while (len > 0) {
1116 int prevlen = len;
1117
1118 ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1119 d->charset, &d->state, NULL);
1120
1121 assert(len < prevlen);
1122
1123 if (ret > 0) {
1124 buf[ret] = '\0';
1125 rdaddsc(&d->output, buf);
1126 }
1127 }
1128
1129 return width;
1130 } else
1131 return 0;
1132 }
1133
1134 static int info_rdaddsc(info_data *d, char const *cs)
1135 {
1136 if (d->wcmode) {
1137 char buf[256];
1138 int ret;
1139
1140 ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1141 d->charset, &d->state, NULL);
1142 if (ret > 0) {
1143 buf[ret] = '\0';
1144 rdaddsc(&d->output, buf);
1145 }
1146
1147 d->wcmode = FALSE;
1148 }
1149
1150 if (cs) {
1151 rdaddsc(&d->output, cs);
1152 return strwid(cs, d->charset);
1153 } else
1154 return 0;
1155 }
1156
1157 static int info_rdadd(info_data *d, wchar_t wc)
1158 {
1159 wchar_t wcs[2];
1160 wcs[0] = wc;
1161 wcs[1] = L'\0';
1162 return info_rdadds(d, wcs);
1163 }
1164
1165 static int info_rdaddc(info_data *d, char c)
1166 {
1167 char cs[2];
1168 cs[0] = c;
1169 cs[1] = '\0';
1170 return info_rdaddsc(d, cs);
1171 }