Make ps_glyph_to_unicode() take a glyph index instead of a glyph name.
[sgt/halibut] / bk_info.c
CommitLineData
5dd44dce 1/*
2 * info backend for Halibut
3 *
b921687e 4 * Possible future work:
5dd44dce 5 *
b921687e 6 * - configurable choice of how to allocate node names?
7 * + possibly a template-like approach, choosing node names to
8 * be the full section title or perhaps the internal keyword?
9 * + neither of those seems quite right. Perhaps instead a
10 * Windows Help-like mechanism, where a magic config
11 * directive allows user choice of name for every node.
12 * + Only trouble with that is, now what happens to the section
13 * numbers? Do they become completely vestigial and just sit
14 * in the title text of each node? Or do we keep them in the
15 * menus somehow? I think people might occasionally want to
16 * go to a section by number, if only because all the _other_
17 * formats of the same document will reference the numbers
18 * all the time. So our menu lines could look like one of
19 * these:
20 * * Nodename: Section 1.2. Title of section.
21 * * Section 1.2: Nodename. Title of section.
5dd44dce 22 *
b921687e 23 * - might be helpful to diagnose duplicate node names!
5dd44dce 24 */
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <assert.h>
29#include "halibut.h"
30
31typedef struct {
32 char *filename;
33 int maxfilesize;
91f93b94 34 int charset;
5b1d0032 35 int listindentbefore, listindentafter;
36 int indent_code, width, index_width;
37 wchar_t *bullet, *listsuffix;
38 wchar_t *startemph, *endemph;
39 wchar_t *lquote, *rquote;
40 wchar_t *sectsuffix, *underline;
41 wchar_t *rule;
f6220253 42 wchar_t *index_text;
5dd44dce 43} infoconfig;
44
91f93b94 45typedef struct {
46 rdstringc output;
47 int charset;
48 charset_state state;
49 int wcmode;
50} info_data;
51#define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
52static const info_data empty_info_data = EMPTY_INFO_DATA;
53
5dd44dce 54typedef struct node_tag node;
55struct node_tag {
56 node *listnext;
57 node *up, *prev, *next, *lastchild;
58 int pos, started_menu, filenum;
59 char *name;
91f93b94 60 info_data text;
5dd44dce 61};
62
63typedef struct {
64 char *text;
91f93b94 65 int length;
5dd44dce 66 int nnodes, nodesize;
67 node **nodes;
68} info_idx;
69
91f93b94 70static int info_rdadd(info_data *, wchar_t);
71static int info_rdadds(info_data *, wchar_t const *);
72static int info_rdaddc(info_data *, char);
73static int info_rdaddsc(info_data *, char const *);
5dd44dce 74
5b1d0032 75static void info_heading(info_data *, word *, word *, int, infoconfig *);
76static void info_rule(info_data *, int, int, infoconfig *);
91f93b94 77static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
5b1d0032 78 int, int, int, infoconfig *);
91f93b94 79static void info_codepara(info_data *, word *, int, int);
5b1d0032 80static void info_versionid(info_data *, word *, infoconfig *);
81static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
5dd44dce 82static word *info_transform_wordlist(word *, keywordlist *);
83static int info_check_index(word *, node *, indexdata *);
84
5b1d0032 85static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
5dd44dce 86
91f93b94 87static node *info_node_new(char *name, int charset);
f6220253 88static char *info_node_name_for_para(paragraph *p, infoconfig *);
89static char *info_node_name_for_text(wchar_t *text, infoconfig *);
5dd44dce 90
91static infoconfig info_configure(paragraph *source) {
92 infoconfig ret;
5b1d0032 93 paragraph *p;
5dd44dce 94
95 /*
96 * Defaults.
97 */
98 ret.filename = dupstr("output.info");
99 ret.maxfilesize = 64 << 10;
91f93b94 100 ret.charset = CS_ASCII;
5b1d0032 101 ret.width = 70;
102 ret.listindentbefore = 1;
103 ret.listindentafter = 3;
104 ret.indent_code = 2;
105 ret.index_width = 40;
106 ret.listsuffix = L".";
107 ret.bullet = L"\x2022\0-\0\0";
108 ret.rule = L"\x2500\0-\0\0";
109 ret.startemph = L"_\0_\0\0";
110 ret.endemph = uadv(ret.startemph);
111 ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
112 ret.rquote = uadv(ret.lquote);
113 ret.sectsuffix = L": ";
114 ret.underline = L"\x203E\0-\0\0";
f6220253 115 ret.index_text = L"Index";
5b1d0032 116
117 /*
118 * Two-pass configuration so that we can pick up global config
119 * (e.g. `quotes') before having it overridden by specific
120 * config (`info-quotes'), irrespective of the order in which
121 * they occur.
122 */
123 for (p = source; p; p = p->next) {
124 if (p->type == para_Config) {
125 if (!ustricmp(p->keyword, L"quotes")) {
126 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
127 ret.lquote = uadv(p->keyword);
128 ret.rquote = uadv(ret.lquote);
129 }
f6220253 130 } else if (!ustricmp(p->keyword, L"index")) {
131 ret.index_text = uadv(p->keyword);
5b1d0032 132 }
133 }
134 }
5dd44dce 135
5b1d0032 136 for (p = source; p; p = p->next) {
137 if (p->type == para_Config) {
138 if (!ustricmp(p->keyword, L"info-filename")) {
5dd44dce 139 sfree(ret.filename);
5b1d0032 140 ret.filename = dupstr(adv(p->origkeyword));
141 } else if (!ustricmp(p->keyword, L"info-charset")) {
0960a3d8 142 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
5b1d0032 143 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
144 ret.maxfilesize = utoi(uadv(p->keyword));
145 } else if (!ustricmp(p->keyword, L"info-width")) {
146 ret.width = utoi(uadv(p->keyword));
147 } else if (!ustricmp(p->keyword, L"info-indent-code")) {
148 ret.indent_code = utoi(uadv(p->keyword));
149 } else if (!ustricmp(p->keyword, L"info-index-width")) {
150 ret.index_width = utoi(uadv(p->keyword));
151 } else if (!ustricmp(p->keyword, L"info-list-indent")) {
152 ret.listindentbefore = utoi(uadv(p->keyword));
153 } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
154 ret.listindentafter = utoi(uadv(p->keyword));
155 } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
156 ret.sectsuffix = uadv(p->keyword);
157 } else if (!ustricmp(p->keyword, L"info-underline")) {
158 ret.underline = uadv(p->keyword);
159 } else if (!ustricmp(p->keyword, L"info-bullet")) {
160 ret.bullet = uadv(p->keyword);
161 } else if (!ustricmp(p->keyword, L"info-rule")) {
162 ret.rule = uadv(p->keyword);
163 } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
164 ret.listsuffix = uadv(p->keyword);
165 } else if (!ustricmp(p->keyword, L"info-emphasis")) {
166 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
167 ret.startemph = uadv(p->keyword);
168 ret.endemph = uadv(ret.startemph);
169 }
170 } else if (!ustricmp(p->keyword, L"info-quotes")) {
171 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
172 ret.lquote = uadv(p->keyword);
173 ret.rquote = uadv(ret.lquote);
174 }
5dd44dce 175 }
176 }
177 }
178
5b1d0032 179 /*
180 * Now process fallbacks on quote characters, underlines, the
181 * rule character, the emphasis characters, and bullets.
182 */
183 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
184 (!cvt_ok(ret.charset, ret.lquote) ||
185 !cvt_ok(ret.charset, ret.rquote))) {
186 ret.lquote = uadv(ret.rquote);
187 ret.rquote = uadv(ret.lquote);
188 }
189
190 while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
191 (!cvt_ok(ret.charset, ret.startemph) ||
192 !cvt_ok(ret.charset, ret.endemph))) {
193 ret.startemph = uadv(ret.endemph);
194 ret.endemph = uadv(ret.startemph);
195 }
196
197 while (*ret.underline && *uadv(ret.underline) &&
198 !cvt_ok(ret.charset, ret.underline))
199 ret.underline = uadv(ret.underline);
200
201 while (*ret.bullet && *uadv(ret.bullet) &&
202 !cvt_ok(ret.charset, ret.bullet))
203 ret.bullet = uadv(ret.bullet);
204
205 while (*ret.rule && *uadv(ret.rule) &&
206 !cvt_ok(ret.charset, ret.rule))
207 ret.rule = uadv(ret.rule);
208
5dd44dce 209 return ret;
210}
211
212paragraph *info_config_filename(char *filename)
213{
e4ea58f8 214 return cmdline_cfg_simple("info-filename", filename, NULL);
5dd44dce 215}
216
217void info_backend(paragraph *sourceform, keywordlist *keywords,
43341922 218 indexdata *idx, void *unused) {
5dd44dce 219 paragraph *p;
220 infoconfig conf;
221 word *prefix, *body, *wp;
222 word spaceword;
91f93b94 223 wchar_t *prefixextra;
5dd44dce 224 int nesting, nestindent;
225 int indentb, indenta;
226 int filepos;
227 int has_index;
91f93b94 228 info_data intro_text = EMPTY_INFO_DATA;
5dd44dce 229 node *topnode, *currnode;
230 word bullet;
231 FILE *fp;
232
43341922 233 IGNORE(unused);
5dd44dce 234
235 conf = info_configure(sourceform);
236
237 /*
238 * Go through and create a node for each section.
239 */
91f93b94 240 topnode = info_node_new("Top", conf.charset);
5dd44dce 241 currnode = topnode;
242 for (p = sourceform; p; p = p->next) switch (p->type) {
243 /*
244 * Chapter titles.
245 */
246 case para_Chapter:
247 case para_Appendix:
248 case para_UnnumberedChapter:
249 case para_Heading:
250 case para_Subsect:
251 {
252 node *newnode, *upnode;
253 char *nodename;
254
f6220253 255 nodename = info_node_name_for_para(p, &conf);
91f93b94 256 newnode = info_node_new(nodename, conf.charset);
5dd44dce 257 sfree(nodename);
258
259 p->private_data = newnode;
260
261 if (p->parent)
262 upnode = (node *)p->parent->private_data;
263 else
264 upnode = topnode;
265 assert(upnode);
266 newnode->up = upnode;
267
268 currnode->next = newnode;
269 newnode->prev = currnode;
270
271 currnode->listnext = newnode;
272 currnode = newnode;
273 }
274 break;
e0e55d41 275 default:
276 p->private_data = NULL;
277 break;
5dd44dce 278 }
279
280 /*
281 * Set up the display form of each index entry.
282 */
283 {
284 int i;
285 indexentry *entry;
286
287 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
f1530049 288 info_idx *ii = snew(info_idx);
91f93b94 289 info_data id = EMPTY_INFO_DATA;
290
291 id.charset = conf.charset;
5dd44dce 292
293 ii->nnodes = ii->nodesize = 0;
294 ii->nodes = NULL;
295
5b1d0032 296 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
f4551933 297
91f93b94 298 ii->text = id.output.text;
5dd44dce 299
300 entry->backend_data = ii;
301 }
302 }
303
304 /*
305 * An Info file begins with a piece of introductory text which
306 * is apparently never shown anywhere. This seems to me to be a
d4c7e130 307 * good place to put the copyright notice and the version IDs.
308 * Also, Info directory entries are expected to go here.
5dd44dce 309 */
91f93b94 310 intro_text.charset = conf.charset;
5dd44dce 311
91f93b94 312 info_rdaddsc(&intro_text,
5dd44dce 313 "This Info file generated by Halibut, ");
91f93b94 314 info_rdaddsc(&intro_text, version);
315 info_rdaddsc(&intro_text, "\n\n");
5dd44dce 316
317 for (p = sourceform; p; p = p->next)
d4c7e130 318 if (p->type == para_Config &&
319 !ustricmp(p->keyword, L"info-dir-entry")) {
320 wchar_t *section, *shortname, *longname, *kw;
321 char *s;
322
323 section = uadv(p->keyword);
12f0ee84 324 shortname = *section ? uadv(section) : L"";
325 longname = *shortname ? uadv(shortname) : L"";
326 kw = *longname ? uadv(longname) : L"";
d4c7e130 327
328 if (!*longname) {
12f0ee84 329 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
d4c7e130 330 continue;
331 }
332
91f93b94 333 info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
334 info_rdadds(&intro_text, section);
335 info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
336 info_rdadds(&intro_text, shortname);
337 info_rdaddsc(&intro_text, ": (");
d4c7e130 338 s = dupstr(conf.filename);
339 if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
340 s[strlen(s)-5] = '\0';
91f93b94 341 info_rdaddsc(&intro_text, s);
d4c7e130 342 sfree(s);
91f93b94 343 info_rdaddsc(&intro_text, ")");
d4c7e130 344 if (*kw) {
345 keyword *kwl = kw_lookup(keywords, kw);
346 if (kwl && kwl->para->private_data) {
347 node *n = (node *)kwl->para->private_data;
91f93b94 348 info_rdaddsc(&intro_text, n->name);
d4c7e130 349 }
350 }
91f93b94 351 info_rdaddsc(&intro_text, ". ");
352 info_rdadds(&intro_text, longname);
353 info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
d4c7e130 354 }
355
356 for (p = sourceform; p; p = p->next)
5dd44dce 357 if (p->type == para_Copyright)
358 info_para(&intro_text, NULL, NULL, p->words, keywords,
5b1d0032 359 0, 0, conf.width, &conf);
5dd44dce 360
361 for (p = sourceform; p; p = p->next)
362 if (p->type == para_VersionID)
5b1d0032 363 info_versionid(&intro_text, p->words, &conf);
5dd44dce 364
91f93b94 365 if (intro_text.output.text[intro_text.output.pos-1] != '\n')
366 info_rdaddc(&intro_text, '\n');
5dd44dce 367
368 /* Do the title */
369 for (p = sourceform; p; p = p->next)
370 if (p->type == para_Title)
5b1d0032 371 info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
5dd44dce 372
5b1d0032 373 nestindent = conf.listindentbefore + conf.listindentafter;
5dd44dce 374 nesting = 0;
375
376 currnode = topnode;
377
378 /* Do the main document */
379 for (p = sourceform; p; p = p->next) switch (p->type) {
380
381 case para_QuotePush:
382 nesting += 2;
383 break;
384 case para_QuotePop:
385 nesting -= 2;
386 assert(nesting >= 0);
387 break;
388
389 case para_LcontPush:
390 nesting += nestindent;
391 break;
392 case para_LcontPop:
393 nesting -= nestindent;
394 assert(nesting >= 0);
395 break;
396
397 /*
398 * Things we ignore because we've already processed them or
399 * aren't going to touch them in this pass.
400 */
401 case para_IM:
402 case para_BR:
403 case para_Biblio: /* only touch BiblioCited */
404 case para_VersionID:
405 case para_NoCite:
406 case para_Title:
407 break;
408
409 /*
410 * Chapter titles.
411 */
412 case para_Chapter:
413 case para_Appendix:
414 case para_UnnumberedChapter:
415 case para_Heading:
416 case para_Subsect:
417 currnode = p->private_data;
418 assert(currnode);
419 assert(currnode->up);
420
421 if (!currnode->up->started_menu) {
91f93b94 422 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
5dd44dce 423 currnode->up->started_menu = TRUE;
424 }
5b1d0032 425 info_menu_item(&currnode->up->text, currnode, p, &conf);
5dd44dce 426
427 has_index |= info_check_index(p->words, currnode, idx);
5b1d0032 428 info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
5dd44dce 429 nesting = 0;
430 break;
431
432 case para_Rule:
5b1d0032 433 info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
5dd44dce 434 break;
435
436 case para_Normal:
437 case para_Copyright:
438 case para_DescribedThing:
439 case para_Description:
440 case para_BiblioCited:
441 case para_Bullet:
442 case para_NumberedList:
443 has_index |= info_check_index(p->words, currnode, idx);
444 if (p->type == para_Bullet) {
445 bullet.next = NULL;
446 bullet.alt = NULL;
447 bullet.type = word_Normal;
5b1d0032 448 bullet.text = conf.bullet;
5dd44dce 449 prefix = &bullet;
450 prefixextra = NULL;
5b1d0032 451 indentb = conf.listindentbefore;
452 indenta = conf.listindentafter;
5dd44dce 453 } else if (p->type == para_NumberedList) {
454 prefix = p->kwtext;
5b1d0032 455 prefixextra = conf.listsuffix;
456 indentb = conf.listindentbefore;
457 indenta = conf.listindentafter;
5dd44dce 458 } else if (p->type == para_Description) {
459 prefix = NULL;
460 prefixextra = NULL;
5b1d0032 461 indentb = conf.listindentbefore;
462 indenta = conf.listindentafter;
5dd44dce 463 } else {
464 prefix = NULL;
465 prefixextra = NULL;
466 indentb = indenta = 0;
467 }
468 if (p->type == para_BiblioCited) {
469 body = dup_word_list(p->kwtext);
470 for (wp = body; wp->next; wp = wp->next);
471 wp->next = &spaceword;
472 spaceword.next = p->words;
473 spaceword.alt = NULL;
474 spaceword.type = word_WhiteSpace;
475 spaceword.text = NULL;
476 } else {
477 wp = NULL;
478 body = p->words;
479 }
480 info_para(&currnode->text, prefix, prefixextra, body, keywords,
481 nesting + indentb, indenta,
5b1d0032 482 conf.width - nesting - indentb - indenta, &conf);
5dd44dce 483 if (wp) {
484 wp->next = NULL;
485 free_word_list(body);
486 }
487 break;
488
489 case para_Code:
490 info_codepara(&currnode->text, p->words,
5b1d0032 491 nesting + conf.indent_code,
492 conf.width - nesting - 2 * conf.indent_code);
5dd44dce 493 break;
494 }
495
496 /*
497 * Create an index node if required.
498 */
499 if (has_index) {
500 node *newnode;
501 int i, j, k;
502 indexentry *entry;
f6220253 503 char *nodename;
504
505 nodename = info_node_name_for_text(conf.index_text, &conf);
506 newnode = info_node_new(nodename, conf.charset);
507 sfree(nodename);
5dd44dce 508
5dd44dce 509 newnode->up = topnode;
510
511 currnode->next = newnode;
512 newnode->prev = currnode;
513 currnode->listnext = newnode;
514
f6220253 515 k = info_rdadds(&newnode->text, conf.index_text);
516 info_rdaddsc(&newnode->text, "\n");
517 while (k > 0) {
518 info_rdadds(&newnode->text, conf.underline);
519 k -= ustrwid(conf.underline, conf.charset);
520 }
521 info_rdaddsc(&newnode->text, "\n\n");
5dd44dce 522
5b1d0032 523 info_menu_item(&topnode->text, newnode, NULL, &conf);
5dd44dce 524
525 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
526 info_idx *ii = (info_idx *)entry->backend_data;
527
528 for (j = 0; j < ii->nnodes; j++) {
5dd44dce 529 /*
530 * When we have multiple references for a single
531 * index term, we only display the actual term on
532 * the first line, to make it clear that the terms
533 * really are the same.
534 */
535 if (j == 0)
91f93b94 536 info_rdaddsc(&newnode->text, ii->text);
5b1d0032 537 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
91f93b94 538 info_rdaddc(&newnode->text, ' ');
5b1d0032 539 info_rdaddsc(&newnode->text, " *Note ");
91f93b94 540 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
541 info_rdaddsc(&newnode->text, "::\n");
5dd44dce 542 }
543 }
544 }
545
546 /*
547 * Finalise the text of each node, by adding the ^_ delimiter
548 * and the node line at the top.
549 */
550 for (currnode = topnode; currnode; currnode = currnode->listnext) {
91f93b94 551 char *origtext = currnode->text.output.text;
552 currnode->text = empty_info_data;
553 currnode->text.charset = conf.charset;
554 info_rdaddsc(&currnode->text, "\037\nFile: ");
555 info_rdaddsc(&currnode->text, conf.filename);
556 info_rdaddsc(&currnode->text, ", Node: ");
557 info_rdaddsc(&currnode->text, currnode->name);
5dd44dce 558 if (currnode->prev) {
91f93b94 559 info_rdaddsc(&currnode->text, ", Prev: ");
560 info_rdaddsc(&currnode->text, currnode->prev->name);
5dd44dce 561 }
91f93b94 562 info_rdaddsc(&currnode->text, ", Up: ");
563 info_rdaddsc(&currnode->text, (currnode->up ?
564 currnode->up->name : "(dir)"));
5dd44dce 565 if (currnode->next) {
91f93b94 566 info_rdaddsc(&currnode->text, ", Next: ");
567 info_rdaddsc(&currnode->text, currnode->next->name);
5dd44dce 568 }
91f93b94 569 info_rdaddsc(&currnode->text, "\n\n");
570 info_rdaddsc(&currnode->text, origtext);
5dd44dce 571 /*
572 * Just make _absolutely_ sure we end with a newline.
573 */
91f93b94 574 if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
575 info_rdaddc(&currnode->text, '\n');
5dd44dce 576
577 sfree(origtext);
578 }
579
580 /*
581 * Compute the offsets for the tag table.
582 */
91f93b94 583 filepos = intro_text.output.pos;
5dd44dce 584 for (currnode = topnode; currnode; currnode = currnode->listnext) {
585 currnode->pos = filepos;
91f93b94 586 filepos += currnode->text.output.pos;
5dd44dce 587 }
588
589 /*
590 * Split into sub-files.
591 */
592 if (conf.maxfilesize > 0) {
91f93b94 593 int currfilesize = intro_text.output.pos, currfilenum = 1;
5dd44dce 594 for (currnode = topnode; currnode; currnode = currnode->listnext) {
91f93b94 595 if (currfilesize > intro_text.output.pos &&
596 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
5dd44dce 597 currfilenum++;
91f93b94 598 currfilesize = intro_text.output.pos;
5dd44dce 599 }
600 currnode->filenum = currfilenum;
91f93b94 601 currfilesize += currnode->text.output.pos;
5dd44dce 602 }
603 }
604
605 /*
606 * Write the primary output file.
607 */
608 fp = fopen(conf.filename, "w");
609 if (!fp) {
610 error(err_cantopenw, conf.filename);
611 return;
612 }
91f93b94 613 fputs(intro_text.output.text, fp);
5dd44dce 614 if (conf.maxfilesize == 0) {
615 for (currnode = topnode; currnode; currnode = currnode->listnext)
91f93b94 616 fputs(currnode->text.output.text, fp);
5dd44dce 617 } else {
618 int filenum = 0;
619 fprintf(fp, "\037\nIndirect:\n");
620 for (currnode = topnode; currnode; currnode = currnode->listnext)
621 if (filenum != currnode->filenum) {
622 filenum = currnode->filenum;
623 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
624 currnode->pos);
625 }
626 }
627 fprintf(fp, "\037\nTag Table:\n");
628 if (conf.maxfilesize > 0)
629 fprintf(fp, "(Indirect)\n");
630 for (currnode = topnode; currnode; currnode = currnode->listnext)
631 fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
632 fprintf(fp, "\037\nEnd Tag Table\n");
633 fclose(fp);
634
635 /*
636 * Write the subfiles.
637 */
638 if (conf.maxfilesize > 0) {
639 int filenum = 0;
640 fp = NULL;
641
642 for (currnode = topnode; currnode; currnode = currnode->listnext) {
643 if (filenum != currnode->filenum) {
644 char *fname;
645
646 filenum = currnode->filenum;
647
648 if (fp)
649 fclose(fp);
f1530049 650 fname = snewn(strlen(conf.filename) + 40, char);
5dd44dce 651 sprintf(fname, "%s-%d", conf.filename, filenum);
652 fp = fopen(fname, "w");
653 if (!fp) {
654 error(err_cantopenw, fname);
655 return;
656 }
657 sfree(fname);
91f93b94 658 fputs(intro_text.output.text, fp);
5dd44dce 659 }
91f93b94 660 fputs(currnode->text.output.text, fp);
5dd44dce 661 }
662
663 if (fp)
664 fclose(fp);
665 }
666}
667
668static int info_check_index(word *w, node *n, indexdata *idx)
669{
670 int ret = 0;
671
672 for (; w; w = w->next) {
673 if (w->type == word_IndexRef) {
674 indextag *tag;
675 int i;
676
677 tag = index_findtag(idx, w->text);
678 if (!tag)
679 break;
680
681 for (i = 0; i < tag->nrefs; i++) {
682 indexentry *entry = tag->refs[i];
683 info_idx *ii = (info_idx *)entry->backend_data;
684
685 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
686 /*
687 * If the same index term is indexed twice
688 * within the same section, we only want to
689 * mention it once in the index. So do nothing
690 * here.
691 */
692 continue;
693 }
694
695 if (ii->nnodes >= ii->nodesize) {
696 ii->nodesize += 32;
f1530049 697 ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
5dd44dce 698 }
699
700 ii->nodes[ii->nnodes++] = n;
701
702 ret = 1;
703 }
704 }
705 }
706
707 return ret;
708}
709
5dd44dce 710static word *info_transform_wordlist(word *words, keywordlist *keywords)
711{
712 word *ret = dup_word_list(words);
713 word *w;
714 keyword *kwl;
715
716 for (w = ret; w; w = w->next) {
717 w->private_data = NULL;
718 if (w->type == word_UpperXref || w->type == word_LowerXref) {
719 kwl = kw_lookup(keywords, w->text);
720 if (kwl) {
721 if (kwl->para->type == para_NumberedList ||
722 kwl->para->type == para_BiblioCited) {
723 /*
724 * In Info, we do nothing special for xrefs to
725 * numbered list items or bibliography entries.
726 */
90a0531e 727 continue;
5dd44dce 728 } else {
729 /*
730 * An xref to a different section has its text
731 * completely replaced.
732 */
733 word *w2, *w3, *w4;
734 w2 = w3 = w->next;
735 w4 = NULL;
736 while (w2) {
737 if (w2->type == word_XrefEnd) {
738 w4 = w2->next;
739 w2->next = NULL;
740 break;
741 }
742 w2 = w2->next;
743 }
744 free_word_list(w3);
745
746 /*
747 * Now w is the UpperXref / LowerXref we
748 * started with, and w4 is the next word after
749 * the corresponding XrefEnd (if any). The
750 * simplest thing is just to stick a pointer to
751 * the target node structure in the private
752 * data field of the xref word, and let
753 * info_rdaddwc and friends read the node name
754 * out from there.
755 */
756 w->next = w4;
757 w->private_data = kwl->para->private_data;
758 assert(w->private_data);
759 }
760 }
761 }
762 }
763
764 return ret;
765}
766
5b1d0032 767static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
768 infoconfig *cfg) {
91f93b94 769 int ret = 0;
5dd44dce 770
771 for (; words && words != end; words = words->next) switch (words->type) {
772 case word_HyperLink:
773 case word_HyperEnd:
774 case word_XrefEnd:
775 case word_IndexRef:
776 break;
777
778 case word_Normal:
779 case word_Emph:
780 case word_Code:
781 case word_WeakCode:
782 case word_WhiteSpace:
783 case word_EmphSpace:
784 case word_CodeSpace:
785 case word_WkCodeSpace:
786 case word_Quote:
787 case word_EmphQuote:
788 case word_CodeQuote:
789 case word_WkCodeQuote:
790 assert(words->type != word_CodeQuote &&
791 words->type != word_WkCodeQuote);
792 if (towordstyle(words->type) == word_Emph &&
793 (attraux(words->aux) == attr_First ||
794 attraux(words->aux) == attr_Only))
5b1d0032 795 ret += info_rdadds(id, cfg->startemph);
5dd44dce 796 else if (towordstyle(words->type) == word_Code &&
797 (attraux(words->aux) == attr_First ||
798 attraux(words->aux) == attr_Only))
5b1d0032 799 ret += info_rdadds(id, cfg->lquote);
5dd44dce 800 if (removeattr(words->type) == word_Normal) {
91f93b94 801 if (cvt_ok(id->charset, words->text) || !words->alt)
802 ret += info_rdadds(id, words->text);
5dd44dce 803 else
5b1d0032 804 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
5dd44dce 805 } else if (removeattr(words->type) == word_WhiteSpace) {
91f93b94 806 ret += info_rdadd(id, L' ');
5dd44dce 807 } else if (removeattr(words->type) == word_Quote) {
5b1d0032 808 ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
809 cfg->lquote : cfg->rquote);
5dd44dce 810 }
811 if (towordstyle(words->type) == word_Emph &&
812 (attraux(words->aux) == attr_Last ||
813 attraux(words->aux) == attr_Only))
5b1d0032 814 ret += info_rdadds(id, cfg->endemph);
5dd44dce 815 else if (towordstyle(words->type) == word_Code &&
816 (attraux(words->aux) == attr_Last ||
817 attraux(words->aux) == attr_Only))
5b1d0032 818 ret += info_rdadds(id, cfg->rquote);
5dd44dce 819 break;
820
821 case word_UpperXref:
822 case word_LowerXref:
823 if (xrefs && words->private_data) {
91f93b94 824 /*
825 * This bit is structural and so must be done in char
826 * rather than wchar_t.
827 */
828 ret += info_rdaddsc(id, "*Note ");
829 ret += info_rdaddsc(id, ((node *)words->private_data)->name);
830 ret += info_rdaddsc(id, "::");
5dd44dce 831 }
832 break;
833 }
91f93b94 834
835 return ret;
5dd44dce 836}
837
5b1d0032 838static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
5dd44dce 839
5b1d0032 840static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
5dd44dce 841 int w = 0;
842 while (words) {
5b1d0032 843 w += info_width_internal(words, xrefs, cfg);
5dd44dce 844 words = words->next;
845 }
846 return w;
847}
848
5b1d0032 849static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
850 int wid;
851 int attr;
852
5dd44dce 853 switch (words->type) {
854 case word_HyperLink:
855 case word_HyperEnd:
856 case word_XrefEnd:
857 case word_IndexRef:
858 return 0;
859
5b1d0032 860 case word_UpperXref:
861 case word_LowerXref:
862 if (xrefs && words->private_data) {
863 /* "*Note " plus "::" comes to 8 characters */
864 return 8 + strwid(((node *)words->private_data)->name,
865 cfg->charset);
866 } else
867 return 0;
868 }
869
870 assert(words->type < word_internal_endattrs);
871
872 wid = 0;
873 attr = towordstyle(words->type);
874
875 if (attr == word_Emph || attr == word_Code) {
876 if (attraux(words->aux) == attr_Only ||
877 attraux(words->aux) == attr_First)
878 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
879 cfg->charset);
880 }
881 if (attr == word_Emph || attr == word_Code) {
882 if (attraux(words->aux) == attr_Only ||
883 attraux(words->aux) == attr_Last)
884 wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
885 cfg->charset);
886 }
887
888 switch (words->type) {
5dd44dce 889 case word_Normal:
890 case word_Emph:
891 case word_Code:
892 case word_WeakCode:
5b1d0032 893 if (cvt_ok(cfg->charset, words->text) || !words->alt)
894 wid += ustrwid(words->text, cfg->charset);
895 else
896 wid += info_width_internal_list(words->alt, xrefs, cfg);
897 return wid;
5dd44dce 898
899 case word_WhiteSpace:
900 case word_EmphSpace:
901 case word_CodeSpace:
902 case word_WkCodeSpace:
903 case word_Quote:
904 case word_EmphQuote:
905 case word_CodeQuote:
906 case word_WkCodeQuote:
907 assert(words->type != word_CodeQuote &&
908 words->type != word_WkCodeQuote);
5b1d0032 909 if (removeattr(words->type) == word_Quote) {
910 if (quoteaux(words->aux) == quote_Open)
911 wid += ustrwid(cfg->lquote, cfg->charset);
912 else
913 wid += ustrwid(cfg->rquote, cfg->charset);
914 } else
915 wid++; /* space */
5dd44dce 916 }
5b1d0032 917 return wid;
5dd44dce 918}
919
43341922 920static int info_width_noxrefs(void *ctx, word *words)
5dd44dce 921{
5b1d0032 922 return info_width_internal(words, FALSE, (infoconfig *)ctx);
5dd44dce 923}
43341922 924static int info_width_xrefs(void *ctx, word *words)
5dd44dce 925{
5b1d0032 926 return info_width_internal(words, TRUE, (infoconfig *)ctx);
5dd44dce 927}
928
91f93b94 929static void info_heading(info_data *text, word *tprefix,
5b1d0032 930 word *words, int width, infoconfig *cfg) {
91f93b94 931 int length;
5dd44dce 932 int firstlinewidth, wrapwidth;
5dd44dce 933 wrappedline *wrapping, *p;
934
91f93b94 935 length = 0;
5dd44dce 936 if (tprefix) {
5b1d0032 937 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
938 length += info_rdadds(text, cfg->sectsuffix);
5dd44dce 939 }
5dd44dce 940
5dd44dce 941 wrapwidth = width;
91f93b94 942 firstlinewidth = width - length;
5dd44dce 943
43341922 944 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
5b1d0032 945 info_width_noxrefs, cfg, 0);
5dd44dce 946 for (p = wrapping; p; p = p->next) {
5b1d0032 947 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
91f93b94 948 info_rdadd(text, L'\n');
5b1d0032 949 while (length > 0) {
950 info_rdadds(text, cfg->underline);
951 length -= ustrwid(cfg->underline, cfg->charset);
952 }
91f93b94 953 info_rdadd(text, L'\n');
954 length = 0;
5dd44dce 955 }
956 wrap_free(wrapping);
91f93b94 957 info_rdadd(text, L'\n');
5dd44dce 958}
959
5b1d0032 960static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
961{
91f93b94 962 while (indent--) info_rdadd(text, L' ');
5b1d0032 963 while (width > 0) {
964 info_rdadds(text, cfg->rule);
965 width -= ustrwid(cfg->rule, cfg->charset);
966 }
91f93b94 967 info_rdadd(text, L'\n');
968 info_rdadd(text, L'\n');
5dd44dce 969}
970
91f93b94 971static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
5b1d0032 972 word *input, keywordlist *keywords, int indent,
973 int extraindent, int width, infoconfig *cfg) {
5dd44dce 974 wrappedline *wrapping, *p;
975 word *words;
5dd44dce 976 int e;
977 int i;
978 int firstlinewidth = width;
979
980 words = info_transform_wordlist(input, keywords);
981
982 if (prefix) {
5dd44dce 983 for (i = 0; i < indent; i++)
91f93b94 984 info_rdadd(text, L' ');
5b1d0032 985 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
91f93b94 986 if (prefixextra)
987 e += info_rdadds(text, prefixextra);
5dd44dce 988 /* If the prefix is too long, shorten the first line to fit. */
91f93b94 989 e = extraindent - e;
5dd44dce 990 if (e < 0) {
991 firstlinewidth += e; /* this decreases it, since e < 0 */
992 if (firstlinewidth < 0) {
993 e = indent + extraindent;
994 firstlinewidth = width;
91f93b94 995 info_rdadd(text, L'\n');
5dd44dce 996 } else
997 e = 0;
998 }
5dd44dce 999 } else
1000 e = indent + extraindent;
1001
43341922 1002 wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
5b1d0032 1003 cfg, 0);
5dd44dce 1004 for (p = wrapping; p; p = p->next) {
1005 for (i = 0; i < e; i++)
91f93b94 1006 info_rdadd(text, L' ');
5b1d0032 1007 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
91f93b94 1008 info_rdadd(text, L'\n');
5dd44dce 1009 e = indent + extraindent;
1010 }
1011 wrap_free(wrapping);
91f93b94 1012 info_rdadd(text, L'\n');
5dd44dce 1013
1014 free_word_list(words);
1015}
1016
91f93b94 1017static void info_codepara(info_data *text, word *words,
5dd44dce 1018 int indent, int width) {
1019 int i;
1020
1021 for (; words; words = words->next) if (words->type == word_WeakCode) {
91f93b94 1022 for (i = 0; i < indent; i++)
1023 info_rdadd(text, L' ');
1024 if (info_rdadds(text, words->text) > width) {
5dd44dce 1025 /* FIXME: warn */
1026 }
91f93b94 1027 info_rdadd(text, L'\n');
5dd44dce 1028 }
1029
91f93b94 1030 info_rdadd(text, L'\n');
5dd44dce 1031}
1032
5b1d0032 1033static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1034 info_rdadd(text, L'[');
1035 info_rdaddwc(text, words, NULL, FALSE, cfg);
91f93b94 1036 info_rdadds(text, L"]\n");
5dd44dce 1037}
1038
91f93b94 1039static node *info_node_new(char *name, int charset)
5dd44dce 1040{
1041 node *n;
1042
f1530049 1043 n = snew(node);
91f93b94 1044 n->text = empty_info_data;
1045 n->text.charset = charset;
5dd44dce 1046 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1047 n->name = dupstr(name);
1048 n->started_menu = FALSE;
1049
1050 return n;
1051}
1052
f6220253 1053static char *info_node_name_core(info_data *id, filepos *fpos)
5dd44dce 1054{
f4551933 1055 char *p, *q;
91f93b94 1056
f4551933 1057 /*
f6220253 1058 * We cannot have commas, colons or parentheses in a node name.
1059 * Remove any that we find, with a warning.
f4551933 1060 */
f6220253 1061 p = q = id->output.text;
f4551933 1062 while (*p) {
f6220253 1063 if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1064 error(err_infonodechar, fpos, *p);
f4551933 1065 } else {
1066 *q++ = *p;
1067 }
1068 p++;
1069 }
f6220253 1070 *q = '\0';
1071
1072 return id->output.text;
1073}
1074
1075static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1076{
1077 info_data id = EMPTY_INFO_DATA;
1078
1079 id.charset = cfg->charset;
1080 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1081 NULL, FALSE, cfg);
1082 info_rdaddsc(&id, NULL);
1083
1084 return info_node_name_core(&id, &par->fpos);
1085}
1086
1087static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1088{
1089 info_data id = EMPTY_INFO_DATA;
1090
1091 id.charset = cfg->charset;
1092 info_rdadds(&id, text);
1093 info_rdaddsc(&id, NULL);
f4551933 1094
f6220253 1095 return info_node_name_core(&id, NULL);
5dd44dce 1096}
1097
5b1d0032 1098static void info_menu_item(info_data *text, node *n, paragraph *p,
1099 infoconfig *cfg)
5dd44dce 1100{
1101 /*
1102 * FIXME: Depending on how we're doing node names in this info
1103 * file, we might want to do
1104 *
1105 * * Node name:: Chapter title
1106 *
1107 * _or_
1108 *
1109 * * Chapter number: Node name.
1110 *
91f93b94 1111 * This function mostly works in char rather than wchar_t,
1112 * because a menu item is a structural component.
5dd44dce 1113 */
91f93b94 1114 info_rdaddsc(text, "* ");
1115 info_rdaddsc(text, n->name);
1116 info_rdaddsc(text, "::");
5dd44dce 1117 if (p) {
91f93b94 1118 info_rdaddc(text, ' ');
5b1d0032 1119 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
5dd44dce 1120 }
91f93b94 1121 info_rdaddc(text, '\n');
1122}
1123
1124/*
1125 * These functions implement my wrapper on the rdadd* calls which
1126 * allows me to switch arbitrarily between literal octet-string
1127 * text and charset-translated Unicode. (Because no matter what
1128 * character set I write the actual text in, I expect info readers
1129 * to treat node names and file names literally and to expect
1130 * keywords like `*Note' in their canonical form, so I have to take
1131 * steps to ensure that those structural elements of the file
1132 * aren't messed with.)
1133 */
1134static int info_rdadds(info_data *d, wchar_t const *wcs)
1135{
1136 if (!d->wcmode) {
1137 d->state = charset_init_state;
1138 d->wcmode = TRUE;
1139 }
1140
1141 if (wcs) {
1142 char buf[256];
e5cd393f 1143 int len, width, ret;
1144
1145 width = ustrwid(wcs, d->charset);
91f93b94 1146
e5cd393f 1147 len = ustrlen(wcs);
91f93b94 1148 while (len > 0) {
1149 int prevlen = len;
1150
1151 ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1152 d->charset, &d->state, NULL);
1153
1154 assert(len < prevlen);
1155
1156 if (ret > 0) {
1157 buf[ret] = '\0';
1158 rdaddsc(&d->output, buf);
1159 }
1160 }
1161
e5cd393f 1162 return width;
91f93b94 1163 } else
1164 return 0;
1165}
1166
1167static int info_rdaddsc(info_data *d, char const *cs)
1168{
1169 if (d->wcmode) {
1170 char buf[256];
1171 int ret;
1172
1173 ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1174 d->charset, &d->state, NULL);
1175 if (ret > 0) {
1176 buf[ret] = '\0';
1177 rdaddsc(&d->output, buf);
1178 }
1179
1180 d->wcmode = FALSE;
1181 }
1182
1183 if (cs) {
1184 rdaddsc(&d->output, cs);
5b1d0032 1185 return strwid(cs, d->charset);
91f93b94 1186 } else
1187 return 0;
1188}
1189
1190static int info_rdadd(info_data *d, wchar_t wc)
1191{
1192 wchar_t wcs[2];
1193 wcs[0] = wc;
1194 wcs[1] = L'\0';
1195 return info_rdadds(d, wcs);
1196}
1197
1198static int info_rdaddc(info_data *d, char c)
1199{
1200 char cs[2];
1201 cs[0] = c;
1202 cs[1] = '\0';
1203 return info_rdaddsc(d, cs);
5dd44dce 1204}