Add an error check for correct formatting in Deflate uncompressed
[sgt/halibut] / bk_info.c
1 /*
2 * Info backend for Halibut
3 *
4 * The Info file format isn't well-specified, and what specification
5 * there is is scattered all over the place. Sources include:
6 * (info), from GNU Texinfo.
7 * (texinfo), also from GNU Texinfo.
8 * (Emacs)Misc Help, and (emacs)Info Lookup, from GNU Emacs.
9 * info.el, from GNU Emacs.
10 *
11 * Possible future work:
12 *
13 * - configurable choice of how to allocate node names?
14 * + possibly a template-like approach, choosing node names to
15 * be the full section title or perhaps the internal keyword?
16 * + neither of those seems quite right. Perhaps instead a
17 * Windows Help-like mechanism, where a magic config
18 * directive allows user choice of name for every node.
19 * + Only trouble with that is, now what happens to the section
20 * numbers? Do they become completely vestigial and just sit
21 * in the title text of each node? Or do we keep them in the
22 * menus somehow? I think people might occasionally want to
23 * go to a section by number, if only because all the _other_
24 * formats of the same document will reference the numbers
25 * all the time. So our menu lines could look like one of
26 * these:
27 * * Nodename: Section 1.2. Title of section.
28 * * Section 1.2: Nodename. Title of section.
29 *
30 * - might be helpful to diagnose duplicate node names!
31 *
32 * - Indices generated by makeinfo use a menu rather than a bunch of
33 * cross-references, which reduces visual clutter rather. For
34 * singly-referenced items, it looks like:
35 * * toner cartridge, replacing: Toner.
36 * It does a horrid job on multiply-referenced entries, though,
37 * perhaps because the name before the colon is meant to be unique.
38 * Info's 'i' command requires the use of a menu -- it fails to
39 * find any index entries at all with Halibut's current index format.
40 *
41 * - The string "*note" is matched case-insensitively, so we could
42 * make things slightly less ugly by using the lower-case version
43 * when the user asks for \k. Unfortunately, standalone Info seems
44 * to match node names case-sensitively, so we can't downcase that.
45 *
46 * - The character encoding used in an Info file can be configured using
47 * an Emacs local variables block at the end, like this:
48 * Local Variables:
49 * coding: iso-8859-1
50 * End:
51 */
52
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <assert.h>
56 #include "halibut.h"
57
58 typedef struct {
59 wchar_t *underline;
60 } alignstruct;
61
62 typedef struct {
63 char *filename;
64 int maxfilesize;
65 int charset;
66 int listindentbefore, listindentafter;
67 int indent_code, width, index_width;
68 alignstruct atitle, achapter, *asect;
69 int nasect;
70 wchar_t *bullet, *listsuffix;
71 wchar_t *startemph, *endemph;
72 wchar_t *lquote, *rquote;
73 wchar_t *sectsuffix;
74 wchar_t *rule;
75 wchar_t *index_text;
76 } infoconfig;
77
78 typedef struct {
79 rdstringc output;
80 int charset;
81 charset_state state;
82 int wcmode;
83 } info_data;
84 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
85 static const info_data empty_info_data = EMPTY_INFO_DATA;
86
87 typedef struct node_tag node;
88 struct node_tag {
89 node *listnext;
90 node *up, *prev, *next, *lastchild;
91 int pos, started_menu, filenum;
92 char *name;
93 info_data text;
94 };
95
96 typedef struct {
97 char *text;
98 int length;
99 int nnodes, nodesize;
100 node **nodes;
101 } info_idx;
102
103 static int info_rdadd(info_data *, wchar_t);
104 static int info_rdadds(info_data *, wchar_t const *);
105 static int info_rdaddc(info_data *, char);
106 static int info_rdaddsc(info_data *, char const *);
107
108 static void info_heading(info_data *, word *, word *, alignstruct, int,
109 infoconfig *);
110 static void info_rule(info_data *, int, int, infoconfig *);
111 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
112 int, int, int, infoconfig *);
113 static void info_codepara(info_data *, word *, int, int);
114 static void info_versionid(info_data *, word *, infoconfig *);
115 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
116 static word *info_transform_wordlist(word *, keywordlist *);
117 static int info_check_index(word *, node *, indexdata *);
118
119 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
120
121 static node *info_node_new(char *name, int charset);
122 static char *info_node_name_for_para(paragraph *p, infoconfig *);
123 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
124
125 static infoconfig info_configure(paragraph *source) {
126 infoconfig ret;
127 paragraph *p;
128 int n;
129
130 /*
131 * Defaults.
132 */
133 ret.filename = dupstr("output.info");
134 ret.maxfilesize = 64 << 10;
135 ret.charset = CS_ASCII;
136 ret.width = 70;
137 ret.listindentbefore = 1;
138 ret.listindentafter = 3;
139 ret.indent_code = 2;
140 ret.index_width = 40;
141 ret.listsuffix = L".";
142 ret.bullet = L"\x2022\0-\0\0";
143 ret.rule = L"\x2500\0-\0\0";
144 ret.startemph = L"_\0_\0\0";
145 ret.endemph = uadv(ret.startemph);
146 ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
147 ret.rquote = uadv(ret.lquote);
148 ret.sectsuffix = L": ";
149 /*
150 * Default underline characters are chosen to match those recognised by
151 * Info-fontify-node.
152 */
153 ret.atitle.underline = L"*\0\0";
154 ret.achapter.underline = L"=\0\0";
155 ret.nasect = 2;
156 ret.asect = snewn(ret.nasect, alignstruct);
157 ret.asect[0].underline = L"-\0\0";
158 ret.asect[1].underline = L".\0\0";
159 ret.index_text = L"Index";
160
161 /*
162 * Two-pass configuration so that we can pick up global config
163 * (e.g. `quotes') before having it overridden by specific
164 * config (`info-quotes'), irrespective of the order in which
165 * they occur.
166 */
167 for (p = source; p; p = p->next) {
168 if (p->type == para_Config) {
169 if (!ustricmp(p->keyword, L"quotes")) {
170 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
171 ret.lquote = uadv(p->keyword);
172 ret.rquote = uadv(ret.lquote);
173 }
174 } else if (!ustricmp(p->keyword, L"index")) {
175 ret.index_text = uadv(p->keyword);
176 }
177 }
178 }
179
180 for (p = source; p; p = p->next) {
181 if (p->type == para_Config) {
182 if (!ustricmp(p->keyword, L"info-filename")) {
183 sfree(ret.filename);
184 ret.filename = dupstr(adv(p->origkeyword));
185 } else if (!ustricmp(p->keyword, L"info-charset")) {
186 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
187 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
188 ret.maxfilesize = utoi(uadv(p->keyword));
189 } else if (!ustricmp(p->keyword, L"info-width")) {
190 ret.width = utoi(uadv(p->keyword));
191 } else if (!ustricmp(p->keyword, L"info-indent-code")) {
192 ret.indent_code = utoi(uadv(p->keyword));
193 } else if (!ustricmp(p->keyword, L"info-index-width")) {
194 ret.index_width = utoi(uadv(p->keyword));
195 } else if (!ustricmp(p->keyword, L"info-list-indent")) {
196 ret.listindentbefore = utoi(uadv(p->keyword));
197 } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
198 ret.listindentafter = utoi(uadv(p->keyword));
199 } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
200 ret.sectsuffix = uadv(p->keyword);
201 } else if (!ustricmp(p->keyword, L"info-underline")) {
202 ret.atitle.underline = ret.achapter.underline =
203 uadv(p->keyword);
204 for (n = 0; n < ret.nasect; n++)
205 ret.asect[n].underline = ret.atitle.underline;
206 } else if (!ustricmp(p->keyword, L"info-chapter-underline")) {
207 ret.achapter.underline = uadv(p->keyword);
208 } else if (!ustricmp(p->keyword, L"info-section-underline")) {
209 wchar_t *q = uadv(p->keyword);
210 int n = 0;
211 if (uisdigit(*q)) {
212 n = utoi(q);
213 q = uadv(q);
214 }
215 if (n >= ret.nasect) {
216 int i;
217 ret.asect = sresize(ret.asect, n+1, alignstruct);
218 for (i = ret.nasect; i <= n; i++)
219 ret.asect[i] = ret.asect[ret.nasect-1];
220 ret.nasect = n+1;
221 }
222 ret.asect[n].underline = q;
223 } else if (!ustricmp(p->keyword, L"text-title-underline")) {
224 ret.atitle.underline = uadv(p->keyword);
225 } else if (!ustricmp(p->keyword, L"info-bullet")) {
226 ret.bullet = uadv(p->keyword);
227 } else if (!ustricmp(p->keyword, L"info-rule")) {
228 ret.rule = uadv(p->keyword);
229 } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
230 ret.listsuffix = uadv(p->keyword);
231 } else if (!ustricmp(p->keyword, L"info-emphasis")) {
232 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
233 ret.startemph = uadv(p->keyword);
234 ret.endemph = uadv(ret.startemph);
235 }
236 } else if (!ustricmp(p->keyword, L"info-quotes")) {
237 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
238 ret.lquote = uadv(p->keyword);
239 ret.rquote = uadv(ret.lquote);
240 }
241 }
242 }
243 }
244
245 /*
246 * Now process fallbacks on quote characters, underlines, the
247 * rule character, the emphasis characters, and bullets.
248 */
249 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
250 (!cvt_ok(ret.charset, ret.lquote) ||
251 !cvt_ok(ret.charset, ret.rquote))) {
252 ret.lquote = uadv(ret.rquote);
253 ret.rquote = uadv(ret.lquote);
254 }
255
256 while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
257 (!cvt_ok(ret.charset, ret.startemph) ||
258 !cvt_ok(ret.charset, ret.endemph))) {
259 ret.startemph = uadv(ret.endemph);
260 ret.endemph = uadv(ret.startemph);
261 }
262
263 while (*ret.atitle.underline && *uadv(ret.atitle.underline) &&
264 !cvt_ok(ret.charset, ret.atitle.underline))
265 ret.atitle.underline = uadv(ret.atitle.underline);
266
267 while (*ret.achapter.underline && *uadv(ret.achapter.underline) &&
268 !cvt_ok(ret.charset, ret.achapter.underline))
269 ret.achapter.underline = uadv(ret.achapter.underline);
270
271 for (n = 0; n < ret.nasect; n++) {
272 while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) &&
273 !cvt_ok(ret.charset, ret.asect[n].underline))
274 ret.asect[n].underline = uadv(ret.asect[n].underline);
275 }
276
277 while (*ret.bullet && *uadv(ret.bullet) &&
278 !cvt_ok(ret.charset, ret.bullet))
279 ret.bullet = uadv(ret.bullet);
280
281 while (*ret.rule && *uadv(ret.rule) &&
282 !cvt_ok(ret.charset, ret.rule))
283 ret.rule = uadv(ret.rule);
284
285 return ret;
286 }
287
288 paragraph *info_config_filename(char *filename)
289 {
290 return cmdline_cfg_simple("info-filename", filename, NULL);
291 }
292
293 void info_backend(paragraph *sourceform, keywordlist *keywords,
294 indexdata *idx, void *unused) {
295 paragraph *p;
296 infoconfig conf;
297 word *prefix, *body, *wp;
298 word spaceword;
299 wchar_t *prefixextra;
300 int nesting, nestindent;
301 int indentb, indenta;
302 int filepos;
303 int has_index;
304 info_data intro_text = EMPTY_INFO_DATA;
305 node *topnode, *currnode;
306 word bullet;
307 FILE *fp;
308
309 IGNORE(unused);
310
311 conf = info_configure(sourceform);
312
313 /*
314 * Go through and create a node for each section.
315 */
316 topnode = info_node_new("Top", conf.charset);
317 currnode = topnode;
318 for (p = sourceform; p; p = p->next) switch (p->type) {
319 /*
320 * Chapter titles.
321 */
322 case para_Chapter:
323 case para_Appendix:
324 case para_UnnumberedChapter:
325 case para_Heading:
326 case para_Subsect:
327 {
328 node *newnode, *upnode;
329 char *nodename;
330
331 nodename = info_node_name_for_para(p, &conf);
332 newnode = info_node_new(nodename, conf.charset);
333 sfree(nodename);
334
335 p->private_data = newnode;
336
337 if (p->parent)
338 upnode = (node *)p->parent->private_data;
339 else
340 upnode = topnode;
341 assert(upnode);
342 newnode->up = upnode;
343
344 currnode->next = newnode;
345 newnode->prev = currnode;
346
347 currnode->listnext = newnode;
348 currnode = newnode;
349 }
350 break;
351 default:
352 p->private_data = NULL;
353 break;
354 }
355
356 /*
357 * Set up the display form of each index entry.
358 */
359 {
360 int i;
361 indexentry *entry;
362
363 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
364 info_idx *ii = snew(info_idx);
365 info_data id = EMPTY_INFO_DATA;
366
367 id.charset = conf.charset;
368
369 ii->nnodes = ii->nodesize = 0;
370 ii->nodes = NULL;
371
372 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
373
374 ii->text = id.output.text;
375
376 entry->backend_data = ii;
377 }
378 }
379
380 /*
381 * An Info file begins with a piece of introductory text which
382 * is apparently never shown anywhere. This seems to me to be a
383 * good place to put the copyright notice and the version IDs.
384 * Also, Info directory entries are expected to go here.
385 */
386 intro_text.charset = conf.charset;
387
388 info_rdaddsc(&intro_text,
389 "This Info file generated by Halibut, ");
390 info_rdaddsc(&intro_text, version);
391 info_rdaddsc(&intro_text, "\n\n");
392
393 for (p = sourceform; p; p = p->next)
394 if (p->type == para_Config &&
395 !ustricmp(p->keyword, L"info-dir-entry")) {
396 wchar_t *section, *shortname, *longname, *kw;
397 char *s;
398
399 section = uadv(p->keyword);
400 shortname = *section ? uadv(section) : L"";
401 longname = *shortname ? uadv(shortname) : L"";
402 kw = *longname ? uadv(longname) : L"";
403
404 if (!*longname) {
405 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
406 continue;
407 }
408
409 info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
410 info_rdadds(&intro_text, section);
411 info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
412 info_rdadds(&intro_text, shortname);
413 info_rdaddsc(&intro_text, ": (");
414 s = dupstr(conf.filename);
415 if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
416 s[strlen(s)-5] = '\0';
417 info_rdaddsc(&intro_text, s);
418 sfree(s);
419 info_rdaddsc(&intro_text, ")");
420 if (*kw) {
421 keyword *kwl = kw_lookup(keywords, kw);
422 if (kwl && kwl->para->private_data) {
423 node *n = (node *)kwl->para->private_data;
424 info_rdaddsc(&intro_text, n->name);
425 }
426 }
427 info_rdaddsc(&intro_text, ". ");
428 info_rdadds(&intro_text, longname);
429 info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
430 }
431
432 for (p = sourceform; p; p = p->next)
433 if (p->type == para_Copyright)
434 info_para(&intro_text, NULL, NULL, p->words, keywords,
435 0, 0, conf.width, &conf);
436
437 for (p = sourceform; p; p = p->next)
438 if (p->type == para_VersionID)
439 info_versionid(&intro_text, p->words, &conf);
440
441 if (intro_text.output.text[intro_text.output.pos-1] != '\n')
442 info_rdaddc(&intro_text, '\n');
443
444 /* Do the title */
445 for (p = sourceform; p; p = p->next)
446 if (p->type == para_Title)
447 info_heading(&topnode->text, NULL, p->words,
448 conf.atitle, conf.width, &conf);
449
450 nestindent = conf.listindentbefore + conf.listindentafter;
451 nesting = 0;
452
453 currnode = topnode;
454
455 /* Do the main document */
456 for (p = sourceform; p; p = p->next) switch (p->type) {
457
458 case para_QuotePush:
459 nesting += 2;
460 break;
461 case para_QuotePop:
462 nesting -= 2;
463 assert(nesting >= 0);
464 break;
465
466 case para_LcontPush:
467 nesting += nestindent;
468 break;
469 case para_LcontPop:
470 nesting -= nestindent;
471 assert(nesting >= 0);
472 break;
473
474 /*
475 * Things we ignore because we've already processed them or
476 * aren't going to touch them in this pass.
477 */
478 case para_IM:
479 case para_BR:
480 case para_Biblio: /* only touch BiblioCited */
481 case para_VersionID:
482 case para_NoCite:
483 case para_Title:
484 break;
485
486 /*
487 * Chapter titles.
488 */
489 case para_Chapter:
490 case para_Appendix:
491 case para_UnnumberedChapter:
492 case para_Heading:
493 case para_Subsect:
494 currnode = p->private_data;
495 assert(currnode);
496 assert(currnode->up);
497
498 if (!currnode->up->started_menu) {
499 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
500 currnode->up->started_menu = TRUE;
501 }
502 info_menu_item(&currnode->up->text, currnode, p, &conf);
503
504 has_index |= info_check_index(p->words, currnode, idx);
505 if (p->type == para_Chapter || p->type == para_Appendix ||
506 p->type == para_UnnumberedChapter)
507 info_heading(&currnode->text, p->kwtext, p->words,
508 conf.achapter, conf.width, &conf);
509 else
510 info_heading(&currnode->text, p->kwtext, p->words,
511 conf.asect[p->aux>=conf.nasect?conf.nasect-1:p->aux],
512 conf.width, &conf);
513 nesting = 0;
514 break;
515
516 case para_Rule:
517 info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
518 break;
519
520 case para_Normal:
521 case para_Copyright:
522 case para_DescribedThing:
523 case para_Description:
524 case para_BiblioCited:
525 case para_Bullet:
526 case para_NumberedList:
527 has_index |= info_check_index(p->words, currnode, idx);
528 if (p->type == para_Bullet) {
529 bullet.next = NULL;
530 bullet.alt = NULL;
531 bullet.type = word_Normal;
532 bullet.text = conf.bullet;
533 prefix = &bullet;
534 prefixextra = NULL;
535 indentb = conf.listindentbefore;
536 indenta = conf.listindentafter;
537 } else if (p->type == para_NumberedList) {
538 prefix = p->kwtext;
539 prefixextra = conf.listsuffix;
540 indentb = conf.listindentbefore;
541 indenta = conf.listindentafter;
542 } else if (p->type == para_Description) {
543 prefix = NULL;
544 prefixextra = NULL;
545 indentb = conf.listindentbefore;
546 indenta = conf.listindentafter;
547 } else {
548 prefix = NULL;
549 prefixextra = NULL;
550 indentb = indenta = 0;
551 }
552 if (p->type == para_BiblioCited) {
553 body = dup_word_list(p->kwtext);
554 for (wp = body; wp->next; wp = wp->next);
555 wp->next = &spaceword;
556 spaceword.next = p->words;
557 spaceword.alt = NULL;
558 spaceword.type = word_WhiteSpace;
559 spaceword.text = NULL;
560 } else {
561 wp = NULL;
562 body = p->words;
563 }
564 info_para(&currnode->text, prefix, prefixextra, body, keywords,
565 nesting + indentb, indenta,
566 conf.width - nesting - indentb - indenta, &conf);
567 if (wp) {
568 wp->next = NULL;
569 free_word_list(body);
570 }
571 break;
572
573 case para_Code:
574 info_codepara(&currnode->text, p->words,
575 nesting + conf.indent_code,
576 conf.width - nesting - 2 * conf.indent_code);
577 break;
578 }
579
580 /*
581 * Create an index node if required.
582 */
583 if (has_index) {
584 node *newnode;
585 int i, j, k;
586 indexentry *entry;
587 char *nodename;
588
589 nodename = info_node_name_for_text(conf.index_text, &conf);
590 newnode = info_node_new(nodename, conf.charset);
591 sfree(nodename);
592
593 newnode->up = topnode;
594
595 currnode->next = newnode;
596 newnode->prev = currnode;
597 currnode->listnext = newnode;
598
599 k = info_rdadds(&newnode->text, conf.index_text);
600 info_rdaddsc(&newnode->text, "\n");
601 while (k > 0) {
602 info_rdadds(&newnode->text, conf.achapter.underline);
603 k -= ustrwid(conf.achapter.underline, conf.charset);
604 }
605 info_rdaddsc(&newnode->text, "\n\n");
606
607 info_menu_item(&topnode->text, newnode, NULL, &conf);
608
609 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
610 info_idx *ii = (info_idx *)entry->backend_data;
611
612 for (j = 0; j < ii->nnodes; j++) {
613 /*
614 * When we have multiple references for a single
615 * index term, we only display the actual term on
616 * the first line, to make it clear that the terms
617 * really are the same.
618 */
619 if (j == 0)
620 info_rdaddsc(&newnode->text, ii->text);
621 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
622 info_rdaddc(&newnode->text, ' ');
623 info_rdaddsc(&newnode->text, " *Note ");
624 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
625 info_rdaddsc(&newnode->text, "::\n");
626 }
627 }
628 }
629
630 /*
631 * Finalise the text of each node, by adding the ^_ delimiter
632 * and the node line at the top.
633 */
634 for (currnode = topnode; currnode; currnode = currnode->listnext) {
635 char *origtext = currnode->text.output.text;
636 currnode->text = empty_info_data;
637 currnode->text.charset = conf.charset;
638 info_rdaddsc(&currnode->text, "\037\nFile: ");
639 info_rdaddsc(&currnode->text, conf.filename);
640 info_rdaddsc(&currnode->text, ", Node: ");
641 info_rdaddsc(&currnode->text, currnode->name);
642 if (currnode->prev) {
643 info_rdaddsc(&currnode->text, ", Prev: ");
644 info_rdaddsc(&currnode->text, currnode->prev->name);
645 }
646 info_rdaddsc(&currnode->text, ", Up: ");
647 info_rdaddsc(&currnode->text, (currnode->up ?
648 currnode->up->name : "(dir)"));
649 if (currnode->next) {
650 info_rdaddsc(&currnode->text, ", Next: ");
651 info_rdaddsc(&currnode->text, currnode->next->name);
652 }
653 info_rdaddsc(&currnode->text, "\n\n");
654 info_rdaddsc(&currnode->text, origtext);
655 /*
656 * Just make _absolutely_ sure we end with a newline.
657 */
658 if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
659 info_rdaddc(&currnode->text, '\n');
660
661 sfree(origtext);
662 }
663
664 /*
665 * Compute the offsets for the tag table.
666 */
667 filepos = intro_text.output.pos;
668 for (currnode = topnode; currnode; currnode = currnode->listnext) {
669 currnode->pos = filepos;
670 filepos += currnode->text.output.pos;
671 }
672
673 /*
674 * Split into sub-files.
675 */
676 if (conf.maxfilesize > 0) {
677 int currfilesize = intro_text.output.pos, currfilenum = 1;
678 for (currnode = topnode; currnode; currnode = currnode->listnext) {
679 if (currfilesize > intro_text.output.pos &&
680 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
681 currfilenum++;
682 currfilesize = intro_text.output.pos;
683 }
684 currnode->filenum = currfilenum;
685 currfilesize += currnode->text.output.pos;
686 }
687 }
688
689 /*
690 * Write the primary output file.
691 */
692 fp = fopen(conf.filename, "w");
693 if (!fp) {
694 error(err_cantopenw, conf.filename);
695 return;
696 }
697 fputs(intro_text.output.text, fp);
698 if (conf.maxfilesize == 0) {
699 for (currnode = topnode; currnode; currnode = currnode->listnext)
700 fputs(currnode->text.output.text, fp);
701 } else {
702 int filenum = 0;
703 fprintf(fp, "\037\nIndirect:\n");
704 for (currnode = topnode; currnode; currnode = currnode->listnext)
705 if (filenum != currnode->filenum) {
706 filenum = currnode->filenum;
707 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
708 currnode->pos);
709 }
710 }
711 fprintf(fp, "\037\nTag Table:\n");
712 if (conf.maxfilesize > 0)
713 fprintf(fp, "(Indirect)\n");
714 for (currnode = topnode; currnode; currnode = currnode->listnext)
715 fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
716 fprintf(fp, "\037\nEnd Tag Table\n");
717 fclose(fp);
718
719 /*
720 * Write the subfiles.
721 */
722 if (conf.maxfilesize > 0) {
723 int filenum = 0;
724 fp = NULL;
725
726 for (currnode = topnode; currnode; currnode = currnode->listnext) {
727 if (filenum != currnode->filenum) {
728 char *fname;
729
730 filenum = currnode->filenum;
731
732 if (fp)
733 fclose(fp);
734 fname = snewn(strlen(conf.filename) + 40, char);
735 sprintf(fname, "%s-%d", conf.filename, filenum);
736 fp = fopen(fname, "w");
737 if (!fp) {
738 error(err_cantopenw, fname);
739 return;
740 }
741 sfree(fname);
742 fputs(intro_text.output.text, fp);
743 }
744 fputs(currnode->text.output.text, fp);
745 }
746
747 if (fp)
748 fclose(fp);
749 }
750 }
751
752 static int info_check_index(word *w, node *n, indexdata *idx)
753 {
754 int ret = 0;
755
756 for (; w; w = w->next) {
757 if (w->type == word_IndexRef) {
758 indextag *tag;
759 int i;
760
761 tag = index_findtag(idx, w->text);
762 if (!tag)
763 break;
764
765 for (i = 0; i < tag->nrefs; i++) {
766 indexentry *entry = tag->refs[i];
767 info_idx *ii = (info_idx *)entry->backend_data;
768
769 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
770 /*
771 * If the same index term is indexed twice
772 * within the same section, we only want to
773 * mention it once in the index. So do nothing
774 * here.
775 */
776 continue;
777 }
778
779 if (ii->nnodes >= ii->nodesize) {
780 ii->nodesize += 32;
781 ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
782 }
783
784 ii->nodes[ii->nnodes++] = n;
785
786 ret = 1;
787 }
788 }
789 }
790
791 return ret;
792 }
793
794 static word *info_transform_wordlist(word *words, keywordlist *keywords)
795 {
796 word *ret = dup_word_list(words);
797 word *w;
798 keyword *kwl;
799
800 for (w = ret; w; w = w->next) {
801 w->private_data = NULL;
802 if (w->type == word_UpperXref || w->type == word_LowerXref) {
803 kwl = kw_lookup(keywords, w->text);
804 if (kwl) {
805 if (kwl->para->type == para_NumberedList ||
806 kwl->para->type == para_BiblioCited) {
807 /*
808 * In Info, we do nothing special for xrefs to
809 * numbered list items or bibliography entries.
810 */
811 continue;
812 } else {
813 /*
814 * An xref to a different section has its text
815 * completely replaced.
816 */
817 word *w2, *w3, *w4;
818 w2 = w3 = w->next;
819 w4 = NULL;
820 while (w2) {
821 if (w2->type == word_XrefEnd) {
822 w4 = w2->next;
823 w2->next = NULL;
824 break;
825 }
826 w2 = w2->next;
827 }
828 free_word_list(w3);
829
830 /*
831 * Now w is the UpperXref / LowerXref we
832 * started with, and w4 is the next word after
833 * the corresponding XrefEnd (if any). The
834 * simplest thing is just to stick a pointer to
835 * the target node structure in the private
836 * data field of the xref word, and let
837 * info_rdaddwc and friends read the node name
838 * out from there.
839 */
840 w->next = w4;
841 w->private_data = kwl->para->private_data;
842 assert(w->private_data);
843 }
844 }
845 }
846 }
847
848 return ret;
849 }
850
851 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
852 infoconfig *cfg) {
853 int ret = 0;
854
855 for (; words && words != end; words = words->next) switch (words->type) {
856 case word_HyperLink:
857 case word_HyperEnd:
858 case word_XrefEnd:
859 case word_IndexRef:
860 break;
861
862 case word_Normal:
863 case word_Emph:
864 case word_Code:
865 case word_WeakCode:
866 case word_WhiteSpace:
867 case word_EmphSpace:
868 case word_CodeSpace:
869 case word_WkCodeSpace:
870 case word_Quote:
871 case word_EmphQuote:
872 case word_CodeQuote:
873 case word_WkCodeQuote:
874 assert(words->type != word_CodeQuote &&
875 words->type != word_WkCodeQuote);
876 if (towordstyle(words->type) == word_Emph &&
877 (attraux(words->aux) == attr_First ||
878 attraux(words->aux) == attr_Only))
879 ret += info_rdadds(id, cfg->startemph);
880 else if (towordstyle(words->type) == word_Code &&
881 (attraux(words->aux) == attr_First ||
882 attraux(words->aux) == attr_Only))
883 ret += info_rdadds(id, cfg->lquote);
884 if (removeattr(words->type) == word_Normal) {
885 if (cvt_ok(id->charset, words->text) || !words->alt)
886 ret += info_rdadds(id, words->text);
887 else
888 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
889 } else if (removeattr(words->type) == word_WhiteSpace) {
890 ret += info_rdadd(id, L' ');
891 } else if (removeattr(words->type) == word_Quote) {
892 ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
893 cfg->lquote : cfg->rquote);
894 }
895 if (towordstyle(words->type) == word_Emph &&
896 (attraux(words->aux) == attr_Last ||
897 attraux(words->aux) == attr_Only))
898 ret += info_rdadds(id, cfg->endemph);
899 else if (towordstyle(words->type) == word_Code &&
900 (attraux(words->aux) == attr_Last ||
901 attraux(words->aux) == attr_Only))
902 ret += info_rdadds(id, cfg->rquote);
903 break;
904
905 case word_UpperXref:
906 case word_LowerXref:
907 if (xrefs && words->private_data) {
908 /*
909 * This bit is structural and so must be done in char
910 * rather than wchar_t.
911 */
912 ret += info_rdaddsc(id, "*Note ");
913 ret += info_rdaddsc(id, ((node *)words->private_data)->name);
914 ret += info_rdaddsc(id, "::");
915 }
916 break;
917 }
918
919 return ret;
920 }
921
922 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
923
924 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
925 int w = 0;
926 while (words) {
927 w += info_width_internal(words, xrefs, cfg);
928 words = words->next;
929 }
930 return w;
931 }
932
933 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
934 int wid;
935 int attr;
936
937 switch (words->type) {
938 case word_HyperLink:
939 case word_HyperEnd:
940 case word_XrefEnd:
941 case word_IndexRef:
942 return 0;
943
944 case word_UpperXref:
945 case word_LowerXref:
946 if (xrefs && words->private_data) {
947 /* "*Note " plus "::" comes to 8 characters */
948 return 8 + strwid(((node *)words->private_data)->name,
949 cfg->charset);
950 } else
951 return 0;
952 }
953
954 assert(words->type < word_internal_endattrs);
955
956 wid = 0;
957 attr = towordstyle(words->type);
958
959 if (attr == word_Emph || attr == word_Code) {
960 if (attraux(words->aux) == attr_Only ||
961 attraux(words->aux) == attr_First)
962 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
963 cfg->charset);
964 }
965 if (attr == word_Emph || attr == word_Code) {
966 if (attraux(words->aux) == attr_Only ||
967 attraux(words->aux) == attr_Last)
968 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
969 cfg->charset);
970 }
971
972 switch (words->type) {
973 case word_Normal:
974 case word_Emph:
975 case word_Code:
976 case word_WeakCode:
977 if (cvt_ok(cfg->charset, words->text) || !words->alt)
978 wid += ustrwid(words->text, cfg->charset);
979 else
980 wid += info_width_internal_list(words->alt, xrefs, cfg);
981 return wid;
982
983 case word_WhiteSpace:
984 case word_EmphSpace:
985 case word_CodeSpace:
986 case word_WkCodeSpace:
987 case word_Quote:
988 case word_EmphQuote:
989 case word_CodeQuote:
990 case word_WkCodeQuote:
991 assert(words->type != word_CodeQuote &&
992 words->type != word_WkCodeQuote);
993 if (removeattr(words->type) == word_Quote) {
994 if (quoteaux(words->aux) == quote_Open)
995 wid += ustrwid(cfg->lquote, cfg->charset);
996 else
997 wid += ustrwid(cfg->rquote, cfg->charset);
998 } else
999 wid++; /* space */
1000 }
1001 return wid;
1002 }
1003
1004 static int info_width_noxrefs(void *ctx, word *words)
1005 {
1006 return info_width_internal(words, FALSE, (infoconfig *)ctx);
1007 }
1008 static int info_width_xrefs(void *ctx, word *words)
1009 {
1010 return info_width_internal(words, TRUE, (infoconfig *)ctx);
1011 }
1012
1013 static void info_heading(info_data *text, word *tprefix,
1014 word *words, alignstruct align,
1015 int width, infoconfig *cfg) {
1016 int length;
1017 int firstlinewidth, wrapwidth;
1018 wrappedline *wrapping, *p;
1019
1020 length = 0;
1021 if (tprefix) {
1022 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
1023 length += info_rdadds(text, cfg->sectsuffix);
1024 }
1025
1026 wrapwidth = width;
1027 firstlinewidth = width - length;
1028
1029 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
1030 info_width_noxrefs, cfg, 0);
1031 for (p = wrapping; p; p = p->next) {
1032 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
1033 info_rdadd(text, L'\n');
1034 if (*align.underline) {
1035 while (length > 0) {
1036 info_rdadds(text, align.underline);
1037 length -= ustrwid(align.underline, cfg->charset);
1038 }
1039 info_rdadd(text, L'\n');
1040 }
1041 length = 0;
1042 }
1043 wrap_free(wrapping);
1044 info_rdadd(text, L'\n');
1045 }
1046
1047 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
1048 {
1049 while (indent--) info_rdadd(text, L' ');
1050 while (width > 0) {
1051 info_rdadds(text, cfg->rule);
1052 width -= ustrwid(cfg->rule, cfg->charset);
1053 }
1054 info_rdadd(text, L'\n');
1055 info_rdadd(text, L'\n');
1056 }
1057
1058 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
1059 word *input, keywordlist *keywords, int indent,
1060 int extraindent, int width, infoconfig *cfg) {
1061 wrappedline *wrapping, *p;
1062 word *words;
1063 int e;
1064 int i;
1065 int firstlinewidth = width;
1066
1067 words = info_transform_wordlist(input, keywords);
1068
1069 if (prefix) {
1070 for (i = 0; i < indent; i++)
1071 info_rdadd(text, L' ');
1072 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1073 if (prefixextra)
1074 e += info_rdadds(text, prefixextra);
1075 /* If the prefix is too long, shorten the first line to fit. */
1076 e = extraindent - e;
1077 if (e < 0) {
1078 firstlinewidth += e; /* this decreases it, since e < 0 */
1079 if (firstlinewidth < 0) {
1080 e = indent + extraindent;
1081 firstlinewidth = width;
1082 info_rdadd(text, L'\n');
1083 } else
1084 e = 0;
1085 }
1086 } else
1087 e = indent + extraindent;
1088
1089 wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1090 cfg, 0);
1091 for (p = wrapping; p; p = p->next) {
1092 for (i = 0; i < e; i++)
1093 info_rdadd(text, L' ');
1094 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1095 info_rdadd(text, L'\n');
1096 e = indent + extraindent;
1097 }
1098 wrap_free(wrapping);
1099 info_rdadd(text, L'\n');
1100
1101 free_word_list(words);
1102 }
1103
1104 static void info_codepara(info_data *text, word *words,
1105 int indent, int width) {
1106 int i;
1107
1108 for (; words; words = words->next) if (words->type == word_WeakCode) {
1109 for (i = 0; i < indent; i++)
1110 info_rdadd(text, L' ');
1111 if (info_rdadds(text, words->text) > width) {
1112 /* FIXME: warn */
1113 }
1114 info_rdadd(text, L'\n');
1115 }
1116
1117 info_rdadd(text, L'\n');
1118 }
1119
1120 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1121 info_rdadd(text, L'[');
1122 info_rdaddwc(text, words, NULL, FALSE, cfg);
1123 info_rdadds(text, L"]\n");
1124 }
1125
1126 static node *info_node_new(char *name, int charset)
1127 {
1128 node *n;
1129
1130 n = snew(node);
1131 n->text = empty_info_data;
1132 n->text.charset = charset;
1133 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1134 n->name = dupstr(name);
1135 n->started_menu = FALSE;
1136
1137 return n;
1138 }
1139
1140 static char *info_node_name_core(info_data *id, filepos *fpos)
1141 {
1142 char *p, *q;
1143
1144 /*
1145 * We cannot have commas, colons or parentheses in a node name.
1146 * Remove any that we find, with a warning.
1147 */
1148 p = q = id->output.text;
1149 while (*p) {
1150 if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1151 error(err_infonodechar, fpos, *p);
1152 } else {
1153 *q++ = *p;
1154 }
1155 p++;
1156 }
1157 *q = '\0';
1158
1159 return id->output.text;
1160 }
1161
1162 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1163 {
1164 info_data id = EMPTY_INFO_DATA;
1165
1166 id.charset = cfg->charset;
1167 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1168 NULL, FALSE, cfg);
1169 info_rdaddsc(&id, NULL);
1170
1171 return info_node_name_core(&id, &par->fpos);
1172 }
1173
1174 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1175 {
1176 info_data id = EMPTY_INFO_DATA;
1177
1178 id.charset = cfg->charset;
1179 info_rdadds(&id, text);
1180 info_rdaddsc(&id, NULL);
1181
1182 return info_node_name_core(&id, NULL);
1183 }
1184
1185 static void info_menu_item(info_data *text, node *n, paragraph *p,
1186 infoconfig *cfg)
1187 {
1188 /*
1189 * FIXME: Depending on how we're doing node names in this info
1190 * file, we might want to do
1191 *
1192 * * Node name:: Chapter title
1193 *
1194 * _or_
1195 *
1196 * * Chapter number: Node name.
1197 *
1198 * This function mostly works in char rather than wchar_t,
1199 * because a menu item is a structural component.
1200 */
1201 info_rdaddsc(text, "* ");
1202 info_rdaddsc(text, n->name);
1203 info_rdaddsc(text, "::");
1204 if (p) {
1205 info_rdaddc(text, ' ');
1206 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1207 }
1208 info_rdaddc(text, '\n');
1209 }
1210
1211 /*
1212 * These functions implement my wrapper on the rdadd* calls which
1213 * allows me to switch arbitrarily between literal octet-string
1214 * text and charset-translated Unicode. (Because no matter what
1215 * character set I write the actual text in, I expect info readers
1216 * to treat node names and file names literally and to expect
1217 * keywords like `*Note' in their canonical form, so I have to take
1218 * steps to ensure that those structural elements of the file
1219 * aren't messed with.)
1220 */
1221 static int info_rdadds(info_data *d, wchar_t const *wcs)
1222 {
1223 if (!d->wcmode) {
1224 d->state = charset_init_state;
1225 d->wcmode = TRUE;
1226 }
1227
1228 if (wcs) {
1229 char buf[256];
1230 int len, width, ret;
1231
1232 width = ustrwid(wcs, d->charset);
1233
1234 len = ustrlen(wcs);
1235 while (len > 0) {
1236 int prevlen = len;
1237
1238 ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1239 d->charset, &d->state, NULL);
1240
1241 assert(len < prevlen);
1242
1243 if (ret > 0) {
1244 buf[ret] = '\0';
1245 rdaddsc(&d->output, buf);
1246 }
1247 }
1248
1249 return width;
1250 } else
1251 return 0;
1252 }
1253
1254 static int info_rdaddsc(info_data *d, char const *cs)
1255 {
1256 if (d->wcmode) {
1257 char buf[256];
1258 int ret;
1259
1260 ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1261 d->charset, &d->state, NULL);
1262 if (ret > 0) {
1263 buf[ret] = '\0';
1264 rdaddsc(&d->output, buf);
1265 }
1266
1267 d->wcmode = FALSE;
1268 }
1269
1270 if (cs) {
1271 rdaddsc(&d->output, cs);
1272 return strwid(cs, d->charset);
1273 } else
1274 return 0;
1275 }
1276
1277 static int info_rdadd(info_data *d, wchar_t wc)
1278 {
1279 wchar_t wcs[2];
1280 wcs[0] = wc;
1281 wcs[1] = L'\0';
1282 return info_rdadds(d, wcs);
1283 }
1284
1285 static int info_rdaddc(info_data *d, char c)
1286 {
1287 char cs[2];
1288 cs[0] = c;
1289 cs[1] = '\0';
1290 return info_rdaddsc(d, cs);
1291 }