The Windows Help backend now uses libcharset to the maximum extent
[sgt/halibut] / bk_whlp.c
1 /*
2 * Windows Help backend for Halibut
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <ctype.h>
8 #include <assert.h>
9
10 #include "halibut.h"
11 #include "winhelp.h"
12
13 struct bk_whlp_state {
14 WHLP h;
15 indexdata *idx;
16 keywordlist *keywords;
17 WHLP_TOPIC curr_topic;
18 FILE *cntfp;
19 int cnt_last_level, cnt_workaround;
20 };
21
22 /*
23 * Indexes of fonts in our standard font descriptor set.
24 */
25 enum {
26 FONT_NORMAL,
27 FONT_EMPH,
28 FONT_CODE,
29 FONT_ITAL_CODE,
30 FONT_BOLD_CODE,
31 FONT_TITLE,
32 FONT_TITLE_EMPH,
33 FONT_TITLE_CODE,
34 FONT_RULE
35 };
36
37 static void whlp_rdaddwc(rdstringc *rs, word *text);
38 static int whlp_convert(wchar_t *s, int maxlen,
39 char **result, int hard_spaces);
40 static void whlp_mkparagraph(struct bk_whlp_state *state,
41 int font, word *text, int subsidiary);
42 static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p);
43 static void whlp_contents_write(struct bk_whlp_state *state,
44 int level, char *text, WHLP_TOPIC topic);
45
46 paragraph *whlp_config_filename(char *filename)
47 {
48 return cmdline_cfg_simple("winhelp-filename", filename, NULL);
49 }
50
51 void whlp_backend(paragraph *sourceform, keywordlist *keywords,
52 indexdata *idx, void *unused) {
53 WHLP h;
54 char *filename, *cntname;
55 paragraph *p, *lastsect;
56 struct bk_whlp_state state;
57 WHLP_TOPIC contents_topic;
58 int i;
59 int nesting;
60 indexentry *ie;
61 int done_contents_topic = FALSE;
62
63 IGNORE(unused);
64
65 h = state.h = whlp_new();
66 state.keywords = keywords;
67 state.idx = idx;
68
69 whlp_start_macro(h, "CB(\"btn_about\",\"&About\",\"About()\")");
70 whlp_start_macro(h, "CB(\"btn_up\",\"&Up\",\"Contents()\")");
71 whlp_start_macro(h, "BrowseButtons()");
72
73 whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
74 0, 0, 0, 0);
75 whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
76 WHLP_FONT_ITALIC, 0, 0, 0);
77 whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
78 0, 0, 0, 0);
79 whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
80 WHLP_FONT_ITALIC, 0, 0, 0);
81 whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
82 WHLP_FONT_BOLD, 0, 0, 0);
83 whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30,
84 WHLP_FONT_BOLD, 0, 0, 0);
85 whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30,
86 WHLP_FONT_BOLD|WHLP_FONT_ITALIC, 0, 0, 0);
87 whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 30,
88 WHLP_FONT_BOLD, 0, 0, 0);
89 whlp_create_font(h, "Courier New", WHLP_FONTFAM_SANS, 18,
90 WHLP_FONT_STRIKEOUT, 0, 0, 0);
91
92 /*
93 * Loop over the source form finding out whether the user has
94 * specified particular help topic names for anything. Also
95 * pick out the output file name at this stage.
96 */
97 filename = dupstr("output.hlp");
98 for (p = sourceform; p; p = p->next) {
99 p->private_data = NULL;
100 if (p->type == para_Config && p->parent) {
101 if (!ustricmp(p->keyword, L"winhelp-topic")) {
102 char *topicname;
103 whlp_convert(uadv(p->keyword), 0, &topicname, 0);
104 /* Store the topic name in the private_data field of the
105 * containing section. */
106 p->parent->private_data = topicname;
107 } else if (!ustricmp(p->keyword, L"winhelp-filename")) {
108 sfree(filename);
109 filename = dupstr(adv(p->origkeyword));
110 }
111 }
112 }
113
114 /*
115 * Ensure the output file name has a .hlp extension. This is
116 * required since we must create the .cnt file in parallel with
117 * it.
118 */
119 {
120 int len = strlen(filename);
121 if (len < 4 || filename[len-4] != '.' ||
122 tolower(filename[len-3] != 'h') ||
123 tolower(filename[len-2] != 'l') ||
124 tolower(filename[len-1] != 'p')) {
125 char *newf;
126 newf = mknewa(char, len + 5);
127 sprintf(newf, "%s.hlp", filename);
128 sfree(filename);
129 filename = newf;
130 len = strlen(newf);
131 }
132 cntname = mknewa(char, len+1);
133 sprintf(cntname, "%.*s.cnt", len-4, filename);
134 }
135
136 state.cntfp = fopen(cntname, "wb");
137 state.cnt_last_level = -1; state.cnt_workaround = 0;
138
139 /*
140 * Loop over the source form registering WHLP_TOPICs for
141 * everything.
142 */
143
144 contents_topic = whlp_register_topic(h, "Top", NULL);
145 whlp_primary_topic(h, contents_topic);
146 for (p = sourceform; p; p = p->next) {
147 if (p->type == para_Chapter ||
148 p->type == para_Appendix ||
149 p->type == para_UnnumberedChapter ||
150 p->type == para_Heading ||
151 p->type == para_Subsect) {
152 char *topicid = p->private_data;
153 char *errstr;
154
155 p->private_data = whlp_register_topic(h, topicid, &errstr);
156 if (!p->private_data) {
157 p->private_data = whlp_register_topic(h, NULL, NULL);
158 error(err_winhelp_ctxclash, &p->fpos, topicid, errstr);
159 }
160 sfree(topicid);
161 }
162 }
163
164 /*
165 * Loop over the index entries, preparing final text forms for
166 * each one.
167 */
168 for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) {
169 rdstringc rs = {0, 0, NULL};
170 whlp_rdaddwc(&rs, ie->text);
171 ie->backend_data = rs.text;
172 }
173
174 whlp_prepare(h);
175
176 /* ------------------------------------------------------------------
177 * Begin the contents page.
178 */
179
180 whlp_begin_topic(h, contents_topic, "Contents", "DB(\"btn_up\")", NULL);
181
182 /*
183 * The manual title goes in the non-scroll region, and also
184 * goes into the system title slot.
185 */
186 {
187 rdstringc rs = {0, 0, NULL};
188 for (p = sourceform; p; p = p->next) {
189 if (p->type == para_Title) {
190 whlp_begin_para(h, WHLP_PARA_NONSCROLL);
191 whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE);
192 whlp_rdaddwc(&rs, p->words);
193 whlp_end_para(h);
194 }
195 }
196 if (rs.text) {
197 whlp_title(h, rs.text);
198 fprintf(state.cntfp, ":Title %s\r\n", rs.text);
199 sfree(rs.text);
200 }
201 whlp_contents_write(&state, 1, "Title page", contents_topic);
202 /* FIXME: configurability in that string */
203 }
204
205 /*
206 * Put the copyright into the system section.
207 */
208 {
209 rdstringc rs = {0, 0, NULL};
210 for (p = sourceform; p; p = p->next) {
211 if (p->type == para_Copyright)
212 whlp_rdaddwc(&rs, p->words);
213 }
214 if (rs.text) {
215 whlp_copyright(h, rs.text);
216 sfree(rs.text);
217 }
218 }
219
220 lastsect = NULL;
221
222 /* ------------------------------------------------------------------
223 * Now we've done the contents page, we're ready to go through
224 * and do the main manual text. Ooh.
225 */
226 nesting = 0;
227 for (p = sourceform; p; p = p->next) switch (p->type) {
228 /*
229 * Things we ignore because we've already processed them or
230 * aren't going to touch them in this pass.
231 */
232 case para_IM:
233 case para_BR:
234 case para_Biblio: /* only touch BiblioCited */
235 case para_VersionID:
236 case para_NoCite:
237 case para_Title:
238 break;
239
240 case para_LcontPush:
241 case para_QuotePush:
242 nesting++;
243 break;
244 case para_LcontPop:
245 case para_QuotePop:
246 assert(nesting > 0);
247 nesting--;
248 break;
249
250 /*
251 * Chapter and section titles: start a new Help topic.
252 */
253 case para_Chapter:
254 case para_Appendix:
255 case para_UnnumberedChapter:
256 case para_Heading:
257 case para_Subsect:
258
259 if (!done_contents_topic) {
260 paragraph *p;
261
262 /*
263 * If this is the first section title we've seen, then
264 * we're currently still in the contents topic. We
265 * should therefore finish up the contents page by
266 * writing a nav menu.
267 */
268 for (p = sourceform; p; p = p->next) {
269 if (p->type == para_Chapter ||
270 p->type == para_Appendix ||
271 p->type == para_UnnumberedChapter)
272 whlp_navmenu(&state, p);
273 }
274
275 state.curr_topic = contents_topic;
276
277 done_contents_topic = TRUE;
278 }
279
280 if (lastsect && lastsect->child) {
281 paragraph *q;
282 /*
283 * Do a navigation menu for the previous section we
284 * were in.
285 */
286 for (q = lastsect->child; q; q = q->sibling)
287 whlp_navmenu(&state, q);
288 }
289 {
290 rdstringc rs = {0, 0, NULL};
291 WHLP_TOPIC new_topic, parent_topic;
292 char *macro, *topicid;
293
294 new_topic = p->private_data;
295 whlp_browse_link(h, state.curr_topic, new_topic);
296 state.curr_topic = new_topic;
297
298 if (p->kwtext) {
299 whlp_rdaddwc(&rs, p->kwtext);
300 rdaddsc(&rs, ": "); /* FIXME: configurability */
301 }
302 whlp_rdaddwc(&rs, p->words);
303 if (p->parent == NULL)
304 parent_topic = contents_topic;
305 else
306 parent_topic = (WHLP_TOPIC)p->parent->private_data;
307 topicid = whlp_topic_id(parent_topic);
308 macro = smalloc(100+strlen(topicid));
309 sprintf(macro,
310 "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")",
311 topicid);
312 whlp_begin_topic(h, new_topic,
313 rs.text ? rs.text : "",
314 macro, NULL);
315 sfree(macro);
316
317 {
318 /*
319 * Output the .cnt entry.
320 *
321 * WinHelp has a bug involving having an internal
322 * node followed by a leaf at the same level: the
323 * leaf is output at the wrong level. We can mostly
324 * work around this by modifying the leaf level
325 * itself (see whlp_contents_write), but this
326 * doesn't work for top-level sections since we
327 * can't turn a level-1 leaf into a level-0 one. So
328 * for top-level leaf sections (Bibliography
329 * springs to mind), we output an internal node
330 * containing only the leaf for that section.
331 */
332 int i;
333 paragraph *q;
334
335 /* Count up the level. */
336 i = 1;
337 for (q = p; q->parent; q = q->parent) i++;
338
339 if (p->child || !p->parent) {
340 /*
341 * If p has children then it needs to be a
342 * folder; if it has no parent then it needs to
343 * be a folder to work around the bug.
344 */
345 whlp_contents_write(&state, i, rs.text, NULL);
346 i++;
347 }
348 whlp_contents_write(&state, i, rs.text, new_topic);
349 }
350
351 sfree(rs.text);
352
353 whlp_begin_para(h, WHLP_PARA_NONSCROLL);
354 if (p->kwtext) {
355 whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, FALSE);
356 whlp_set_font(h, FONT_TITLE);
357 whlp_text(h, ": "); /* FIXME: configurability */
358 }
359 whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE);
360 whlp_end_para(h);
361
362 lastsect = p;
363 }
364 break;
365
366 case para_Rule:
367 whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
368 whlp_para_attr(h, WHLP_PARA_ALIGNMENT, WHLP_ALIGN_CENTRE);
369 whlp_begin_para(h, WHLP_PARA_SCROLL);
370 whlp_set_font(h, FONT_RULE);
371 #define TEN "\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0"
372 #define TWENTY TEN TEN
373 #define FORTY TWENTY TWENTY
374 #define EIGHTY FORTY FORTY
375 whlp_text(h, EIGHTY);
376 #undef TEN
377 #undef TWENTY
378 #undef FORTY
379 #undef EIGHTY
380 whlp_end_para(h);
381 break;
382
383 case para_Normal:
384 case para_Copyright:
385 case para_DescribedThing:
386 case para_Description:
387 case para_BiblioCited:
388 case para_Bullet:
389 case para_NumberedList:
390 whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
391 if (p->type == para_Bullet || p->type == para_NumberedList) {
392 whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72*nesting + 72);
393 whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36);
394 whlp_set_tabstop(h, 72, WHLP_ALIGN_LEFT);
395 whlp_begin_para(h, WHLP_PARA_SCROLL);
396 whlp_set_font(h, FONT_NORMAL);
397 if (p->type == para_Bullet) {
398 whlp_text(h, "\x95");
399 } else {
400 whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE);
401 whlp_text(h, ".");
402 }
403 whlp_tab(h);
404 } else {
405 whlp_para_attr(h, WHLP_PARA_LEFTINDENT,
406 72*nesting + (p->type==para_Description ? 72 : 0));
407 whlp_begin_para(h, WHLP_PARA_SCROLL);
408 }
409
410 if (p->type == para_BiblioCited) {
411 whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE);
412 whlp_text(h, " ");
413 }
414
415 whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
416 whlp_end_para(h);
417 break;
418
419 case para_Code:
420 /*
421 * In a code paragraph, each individual word is a line. For
422 * Help files, we will have to output this as a set of
423 * paragraphs, all but the last of which don't set
424 * SPACEBELOW.
425 */
426 {
427 word *w;
428 wchar_t *t, *e;
429 char *c;
430
431 for (w = p->words; w; w = w->next) if (w->type == word_WeakCode) {
432 t = w->text;
433 if (w->next && w->next->type == word_Emph) {
434 w = w->next;
435 e = w->text;
436 } else
437 e = NULL;
438
439 if (!w->next)
440 whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
441
442 whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72*nesting);
443 whlp_begin_para(h, WHLP_PARA_SCROLL);
444 while (e && *e && *t) {
445 int n;
446 int ec = *e;
447
448 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
449 if (ec == 'i')
450 whlp_set_font(h, FONT_ITAL_CODE);
451 else if (ec == 'b')
452 whlp_set_font(h, FONT_BOLD_CODE);
453 else
454 whlp_set_font(h, FONT_CODE);
455 whlp_convert(t, n, &c, FALSE);
456 whlp_text(h, c);
457 sfree(c);
458 t += n;
459 e += n;
460 }
461 whlp_set_font(h, FONT_CODE);
462 whlp_convert(t, 0, &c, FALSE);
463 whlp_text(h, c);
464 sfree(c);
465 whlp_end_para(h);
466 }
467 }
468 break;
469 }
470
471 fclose(state.cntfp);
472 whlp_close(h, filename);
473
474 /*
475 * Loop over the index entries, cleaning up our final text
476 * forms.
477 */
478 for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) {
479 sfree(ie->backend_data);
480 }
481
482 sfree(filename);
483 sfree(cntname);
484 }
485
486 static void whlp_contents_write(struct bk_whlp_state *state,
487 int level, char *text, WHLP_TOPIC topic) {
488 /*
489 * Horrifying bug in WinHelp. When dropping a section level or
490 * more without using a folder-type entry, WinHelp accidentally
491 * adds one to the section level. So we correct for that here.
492 */
493 if (state->cnt_last_level > level && topic)
494 state->cnt_workaround = -1;
495 else if (!topic)
496 state->cnt_workaround = 0;
497 state->cnt_last_level = level;
498
499 fprintf(state->cntfp, "%d ", level + state->cnt_workaround);
500 while (*text) {
501 if (*text == '=')
502 fputc('\\', state->cntfp);
503 fputc(*text, state->cntfp);
504 text++;
505 }
506 if (topic)
507 fprintf(state->cntfp, "=%s", whlp_topic_id(topic));
508 fputc('\n', state->cntfp);
509 }
510
511 static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p) {
512 whlp_begin_para(state->h, WHLP_PARA_NONSCROLL);
513 whlp_start_hyperlink(state->h, (WHLP_TOPIC)p->private_data);
514 if (p->kwtext) {
515 whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, TRUE);
516 whlp_set_font(state->h, FONT_NORMAL);
517 whlp_text(state->h, ": "); /* FIXME: configurability */
518 }
519 whlp_mkparagraph(state, FONT_NORMAL, p->words, TRUE);
520 whlp_end_hyperlink(state->h);
521 whlp_end_para(state->h);
522
523 }
524
525 static void whlp_mkparagraph(struct bk_whlp_state *state,
526 int font, word *text, int subsidiary) {
527 keyword *kwl;
528 int deffont = font;
529 int currfont = -1;
530 int newfont;
531 char *c;
532 paragraph *xref_target = NULL;
533
534 for (; text; text = text->next) switch (text->type) {
535 case word_HyperLink:
536 case word_HyperEnd:
537 break;
538
539 case word_IndexRef:
540 if (subsidiary) break; /* disabled in subsidiary bits */
541 {
542 indextag *tag = index_findtag(state->idx, text->text);
543 int i;
544 if (!tag)
545 break;
546 for (i = 0; i < tag->nrefs; i++)
547 whlp_index_term(state->h, tag->refs[i]->backend_data,
548 state->curr_topic);
549 }
550 break;
551
552 case word_UpperXref:
553 case word_LowerXref:
554 if (subsidiary) break; /* disabled in subsidiary bits */
555 kwl = kw_lookup(state->keywords, text->text);
556 assert(xref_target == NULL);
557 if (kwl) {
558 if (kwl->para->type == para_NumberedList) {
559 break; /* don't xref to numbered list items */
560 } else if (kwl->para->type == para_BiblioCited) {
561 /*
562 * An xref to a bibliography item jumps to the section
563 * containing it.
564 */
565 if (kwl->para->parent)
566 xref_target = kwl->para->parent;
567 else
568 break;
569 } else {
570 xref_target = kwl->para;
571 }
572 whlp_start_hyperlink(state->h,
573 (WHLP_TOPIC)xref_target->private_data);
574 }
575 break;
576
577 case word_XrefEnd:
578 if (subsidiary) break; /* disabled in subsidiary bits */
579 if (xref_target)
580 whlp_end_hyperlink(state->h);
581 xref_target = NULL;
582 break;
583
584 case word_Normal:
585 case word_Emph:
586 case word_Code:
587 case word_WeakCode:
588 case word_WhiteSpace:
589 case word_EmphSpace:
590 case word_CodeSpace:
591 case word_WkCodeSpace:
592 case word_Quote:
593 case word_EmphQuote:
594 case word_CodeQuote:
595 case word_WkCodeQuote:
596 if (towordstyle(text->type) == word_Emph)
597 newfont = deffont + FONT_EMPH;
598 else if (towordstyle(text->type) == word_Code ||
599 towordstyle(text->type) == word_WeakCode)
600 newfont = deffont + FONT_CODE;
601 else
602 newfont = deffont;
603 if (newfont != currfont) {
604 currfont = newfont;
605 whlp_set_font(state->h, newfont);
606 }
607 if (removeattr(text->type) == word_Normal) {
608 if (whlp_convert(text->text, 0, &c, TRUE) || !text->alt)
609 whlp_text(state->h, c);
610 else
611 whlp_mkparagraph(state, deffont, text->alt, FALSE);
612 sfree(c);
613 } else if (removeattr(text->type) == word_WhiteSpace) {
614 whlp_text(state->h, " ");
615 } else if (removeattr(text->type) == word_Quote) {
616 whlp_text(state->h,
617 quoteaux(text->aux) == quote_Open ? "\x91" : "\x92");
618 /* FIXME: configurability */
619 }
620 break;
621 }
622 }
623
624 static void whlp_rdaddwc(rdstringc *rs, word *text) {
625 char *c;
626
627 for (; text; text = text->next) switch (text->type) {
628 case word_HyperLink:
629 case word_HyperEnd:
630 case word_UpperXref:
631 case word_LowerXref:
632 case word_XrefEnd:
633 case word_IndexRef:
634 break;
635
636 case word_Normal:
637 case word_Emph:
638 case word_Code:
639 case word_WeakCode:
640 case word_WhiteSpace:
641 case word_EmphSpace:
642 case word_CodeSpace:
643 case word_WkCodeSpace:
644 case word_Quote:
645 case word_EmphQuote:
646 case word_CodeQuote:
647 case word_WkCodeQuote:
648 assert(text->type != word_CodeQuote &&
649 text->type != word_WkCodeQuote);
650 if (removeattr(text->type) == word_Normal) {
651 if (whlp_convert(text->text, 0, &c, FALSE) || !text->alt)
652 rdaddsc(rs, c);
653 else
654 whlp_rdaddwc(rs, text->alt);
655 sfree(c);
656 } else if (removeattr(text->type) == word_WhiteSpace) {
657 rdaddc(rs, ' ');
658 } else if (removeattr(text->type) == word_Quote) {
659 rdaddc(rs, quoteaux(text->aux) == quote_Open ? '\x91' : '\x92');
660 /* FIXME: configurability */
661 }
662 break;
663 }
664 }
665
666 /*
667 * Convert a wide string into a string of chars. If `result' is
668 * non-NULL, mallocs the resulting string and stores a pointer to
669 * it in `*result'. If `result' is NULL, merely checks whether all
670 * characters in the string are feasible for the output character
671 * set.
672 *
673 * Return is nonzero if all characters are OK. If not all
674 * characters are OK but `result' is non-NULL, a result _will_
675 * still be generated!
676 */
677 static int whlp_convert(wchar_t *s, int maxlen,
678 char **result, int hard_spaces) {
679 wchar_t *s2;
680 char *ret;
681 int ok;
682
683 /*
684 * Enforce maxlen.
685 */
686 if (maxlen > 0 && ustrlen(s) > maxlen) {
687 s2 = mknewa(wchar_t, maxlen+1);
688 memcpy(s2, s, maxlen * sizeof(wchar_t));
689 s2[maxlen] = L'\0';
690 s = s2;
691 } else
692 s2 = NULL;
693
694 /*
695 * We currently only support Win1252 in Windows Help files,
696 * because I don't know how to fiddle the character set
697 * designation in the |SYSTEM file to indicate anything else.
698 */
699
700 ret = utoa_careful_dup(s, CS_CP1252);
701 if (!ret) {
702 ok = FALSE;
703 ret = utoa_dup(s, CS_CP1252);
704 } else
705 ok = TRUE;
706
707 /*
708 * Enforce hard_spaces.
709 */
710 if (hard_spaces) {
711 char *p;
712
713 for (p = ret; *p; p++)
714 if (*p == ' ')
715 *p = '\240';
716 }
717
718 if (s2)
719 sfree(s2);
720
721 *result = ret;
722
723 return ok;
724 }