Add \cfg / -C directives to allow the user to choose the output file
[sgt/halibut] / bk_xhtml.c
1 /*
2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
4 *
5 * Still to do:
6 *
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
11 * perhaps others.
12 *
13 * Limitations:
14 *
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
27 */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include "halibut.h"
34
35 /*
36 * FILENAME_TEMPLATE (overridable in config of course) allows you
37 * to choose the general form for your HTML file names. It is
38 * slightly printf-styled (% followed by a single character is a
39 * formatting directive, %% is a literal %). Formatting directives
40 * are:
41 *
42 * - %n is the section number, minus whitespace (`Chapter1.2').
43 * - %b is the section number on its own (`1.2').
44 * - %k is the section's _internal_ keyword.
45 * - %N is the section's visible title in the output, again minus
46 * whitespace.
47 *
48 * %n, %b and %k will all default to %N if the section is
49 * unnumbered (`Bibliography' is often a good example).
50 */
51
52 #define FILENAME_SINGLE "Manual.html"
53 #define FILENAME_CONTENTS "Contents.html"
54 #define FILENAME_INDEX "IndexPage.html"
55 #define FILENAME_TEMPLATE "%n.html"
56
57 struct xhtmlsection_Struct {
58 struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
59 struct xhtmlsection_Struct *child; /* NULL if split across files */
60 struct xhtmlsection_Struct *parent; /* NULL if split across files */
61 struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
62 paragraph *para;
63 struct xhtmlfile_Struct *file; /* which file is this a part of? */
64 char *fragment; /* fragment id within the file */
65 int level;
66 };
67
68 struct xhtmlfile_Struct {
69 struct xhtmlfile_Struct *next;
70 struct xhtmlfile_Struct *child;
71 struct xhtmlfile_Struct *parent;
72 char *filename;
73 struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
74 int is_leaf; /* is this file a leaf file, ie does it not have any children? */
75 };
76
77 typedef struct xhtmlsection_Struct xhtmlsection;
78 typedef struct xhtmlfile_Struct xhtmlfile;
79 typedef struct xhtmlindex_Struct xhtmlindex;
80
81 struct xhtmlindex_Struct {
82 int nsection;
83 int size;
84 xhtmlsection **sections;
85 };
86
87 typedef struct {
88 int just_numbers;
89 wchar_t *number_suffix;
90 } xhtmlheadfmt;
91
92 typedef struct {
93 int contents_depth[6];
94 int leaf_contains_contents;
95 int leaf_level;
96 int leaf_smallest_contents;
97 int include_version_id;
98 wchar_t *author, *description;
99 wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
100 int suppress_address;
101 xhtmlheadfmt fchapter, *fsect;
102 int nfsect;
103 char *contents_filename, *index_filename;
104 char *single_filename, *template_filename;
105 } xhtmlconfig;
106
107 /*static void xhtml_level(paragraph *, int);
108 static void xhtml_level_0(paragraph *);
109 static void xhtml_docontents(FILE *, paragraph *, int);
110 static void xhtml_dosections(FILE *, paragraph *, int);
111 static void xhtml_dobody(FILE *, paragraph *, int);*/
112
113 static void xhtml_doheader(FILE *, word *);
114 static void xhtml_dofooter(FILE *);
115 static void xhtml_versionid(FILE *, word *, int);
116
117 static void xhtml_utostr(wchar_t *, char **);
118 static int xhtml_para_level(paragraph *);
119 static int xhtml_reservedchar(int);
120
121 static int xhtml_convert(wchar_t *, int, char **, int);
122 static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
123 static void xhtml_para(FILE *, word *, int);
124 static void xhtml_codepara(FILE *, word *);
125 static void xhtml_heading(FILE *, paragraph *, int);
126
127 /* File-global variables are much easier than passing these things
128 * all over the place. Evil, but easier. We can replace this with a single
129 * structure at some point.
130 */
131 static xhtmlconfig conf;
132 static keywordlist *keywords;
133 static indexdata *idx;
134 static xhtmlfile *topfile;
135 static xhtmlsection *topsection;
136 static paragraph *sourceparas;
137 static xhtmlfile *lastfile;
138 static xhtmlfile *xhtml_last_file = NULL;
139 static int last_level=-1, start_level;
140 static xhtmlsection *currentsection;
141
142 static xhtmlconfig xhtml_configure(paragraph *source)
143 {
144 xhtmlconfig ret;
145
146 /*
147 * Defaults.
148 */
149 ret.contents_depth[0] = 2;
150 ret.contents_depth[1] = 3;
151 ret.contents_depth[2] = 4;
152 ret.contents_depth[3] = 5;
153 ret.contents_depth[4] = 6;
154 ret.contents_depth[5] = 7;
155 ret.leaf_level = 2;
156 ret.leaf_smallest_contents = 4;
157 ret.leaf_contains_contents = FALSE;
158 ret.include_version_id = TRUE;
159 ret.author = NULL;
160 ret.description = NULL;
161 ret.head_end = NULL;
162 ret.body = NULL;
163 ret.body_start = NULL;
164 ret.body_end = NULL;
165 ret.address_start = NULL;
166 ret.address_end = NULL;
167 ret.nav_attrs = NULL;
168 ret.suppress_address = FALSE;
169
170 ret.fchapter.just_numbers = FALSE;
171 ret.fchapter.number_suffix = L": ";
172 ret.nfsect = 2;
173 ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
174 ret.fsect[0].just_numbers = FALSE;
175 ret.fsect[0].number_suffix = L": ";
176 ret.fsect[1].just_numbers = TRUE;
177 ret.fsect[1].number_suffix = L" ";
178 ret.contents_filename = strdup(FILENAME_CONTENTS);
179 ret.single_filename = strdup(FILENAME_SINGLE);
180 ret.index_filename = strdup(FILENAME_INDEX);
181 ret.template_filename = strdup(FILENAME_TEMPLATE);
182
183 for (; source; source = source->next)
184 {
185 if (source->type == para_Config)
186 {
187 if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
188 sfree(ret.contents_filename);
189 ret.contents_filename = utoa_dup(uadv(source->keyword));
190 } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
191 sfree(ret.single_filename);
192 ret.single_filename = utoa_dup(uadv(source->keyword));
193 } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
194 sfree(ret.index_filename);
195 ret.index_filename = utoa_dup(uadv(source->keyword));
196 } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
197 sfree(ret.template_filename);
198 ret.template_filename = utoa_dup(uadv(source->keyword));
199 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
200 ret.contents_depth[0] = utoi(uadv(source->keyword));
201 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
202 ret.contents_depth[1] = utoi(uadv(source->keyword));
203 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
204 ret.contents_depth[2] = utoi(uadv(source->keyword));
205 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
206 ret.contents_depth[3] = utoi(uadv(source->keyword));
207 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
208 ret.contents_depth[4] = utoi(uadv(source->keyword));
209 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
210 ret.contents_depth[5] = utoi(uadv(source->keyword));
211 } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
212 ret.leaf_level = utoi(uadv(source->keyword));
213 } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
214 ret.leaf_smallest_contents = utoi(uadv(source->keyword));
215 } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
216 ret.include_version_id = utob(uadv(source->keyword));
217 } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
218 ret.leaf_contains_contents = utob(uadv(source->keyword));
219 } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
220 ret.suppress_address = utob(uadv(source->keyword));
221 } else if (!ustricmp(source->keyword, L"xhtml-author")) {
222 ret.author = uadv(source->keyword);
223 } else if (!ustricmp(source->keyword, L"xhtml-description")) {
224 ret.description = uadv(source->keyword);
225 } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
226 ret.head_end = uadv(source->keyword);
227 } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
228 ret.body_start = uadv(source->keyword);
229 } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
230 ret.body = uadv(source->keyword);
231 } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
232 ret.body_end = uadv(source->keyword);
233 } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
234 ret.address_start = uadv(source->keyword);
235 } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
236 ret.address_end = uadv(source->keyword);
237 } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
238 ret.nav_attrs = uadv(source->keyword);
239 } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
240 ret.fchapter.just_numbers = utob(uadv(source->keyword));
241 } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
242 ret.fchapter.number_suffix = uadv(source->keyword);
243 } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
244 wchar_t *p = uadv(source->keyword);
245 int n = 0;
246 if (uisdigit(*p)) {
247 n = utoi(p);
248 p = uadv(p);
249 }
250 if (n >= ret.nfsect) {
251 int i;
252 ret.fsect = resize(ret.fsect, n+1);
253 for (i = ret.nfsect; i <= n; i++)
254 ret.fsect[i] = ret.fsect[ret.nfsect-1];
255 ret.nfsect = n+1;
256 }
257 ret.fsect[n].just_numbers = utob(p);
258 } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
259 wchar_t *p = uadv(source->keyword);
260 int n = 0;
261 if (uisdigit(*p)) {
262 n = utoi(p);
263 p = uadv(p);
264 }
265 if (n >= ret.nfsect) {
266 int i;
267 ret.fsect = resize(ret.fsect, n+1);
268 for (i = ret.nfsect; i <= n; i++)
269 ret.fsect[i] = ret.fsect[ret.nfsect-1];
270 ret.nfsect = n+1;
271 }
272 ret.fsect[n].number_suffix = p;
273 }
274 }
275 }
276
277 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
278 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
279 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
280 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
281 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
282 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
283 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
284 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
285 return ret;
286 }
287
288 static xhtmlsection *xhtml_new_section(xhtmlsection *last)
289 {
290 xhtmlsection *ret = mknew(xhtmlsection);
291 ret->next=NULL;
292 ret->child=NULL;
293 ret->parent=NULL;
294 ret->chain=last;
295 ret->para=NULL;
296 ret->file=NULL;
297 ret->fragment=NULL;
298 ret->level=-1; /* marker: end of chain */
299 return ret;
300 }
301
302 /* Returns NULL or the section that marks that paragraph */
303 static xhtmlsection *xhtml_find_section(paragraph *p)
304 {
305 xhtmlsection *ret = topsection;
306 if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
307 paragraph *p2 = sourceparas;
308 paragraph *p3 = NULL;
309 while (p2 && p2!=p) {
310 if (xhtml_para_level(p2)!=-1) {
311 p3 = p2;
312 }
313 p2=p2->next;
314 }
315 if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
316 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
317 * So don't do that, then.
318 */
319 return NULL;
320 }
321 p=p3;
322 }
323 while (ret && ret->para != p) {
324 /* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
325 ret=ret->chain;
326 }
327 return ret;
328 }
329
330 static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
331 {
332 xhtmlfile *ret = mknew(xhtmlfile);
333
334 ret->next=NULL;
335 ret->child=NULL;
336 ret->parent=NULL;
337 ret->filename=NULL;
338 ret->sections=sect;
339 ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
340 if (sect==NULL) {
341 if (conf.leaf_level==0) { /* currently unused */
342 ret->filename = smalloc(strlen(conf.single_filename)+1);
343 sprintf(ret->filename, conf.single_filename);
344 } else {
345 ret->filename = smalloc(strlen(conf.contents_filename)+1);
346 sprintf(ret->filename, conf.contents_filename);
347 }
348 } else {
349 paragraph *p = sect->para;
350 rdstringc fname_c = { 0, 0, NULL };
351 char *c, *t;
352 word *w;
353 wchar_t *ws;
354
355 t = conf.template_filename;
356 while (*t) {
357 if (*t == '%' && t[1]) {
358 int fmt;
359
360 t++;
361 fmt = *t++;
362
363 if (fmt == '%') {
364 rdaddc(&fname_c, fmt);
365 continue;
366 }
367
368 w = NULL;
369 ws = NULL;
370
371 if (p->kwtext && fmt == 'n')
372 w = p->kwtext;
373 else if (p->kwtext2 && fmt == 'b')
374 w = p->kwtext2;
375 else if (p->keyword && *p->keyword && fmt == 'k')
376 ws = p->keyword;
377 else
378 w = p->words;
379
380 while (w) {
381 switch (removeattr(w->type))
382 {
383 case word_Normal:
384 /*case word_Emph:
385 case word_Code:
386 case word_WeakCode:*/
387 xhtml_utostr(w->text, &c);
388 rdaddsc(&fname_c,c);
389 sfree(c);
390 break;
391 }
392 w = w->next;
393 }
394 if (ws) {
395 xhtml_utostr(ws, &c);
396 rdaddsc(&fname_c,c);
397 sfree(c);
398 }
399 } else {
400 rdaddc(&fname_c, *t++);
401 }
402 }
403
404 ret->filename = rdtrimc(&fname_c);
405 }
406 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
407 return ret;
408 }
409
410 /*
411 * Walk the tree fixing up files which are actually leaf (ie
412 * have no children) but aren't at leaf level, so they have the
413 * leaf flag set.
414 */
415 void xhtml_fixup_layout(xhtmlfile* file)
416 {
417 if (file->child==NULL) {
418 file->is_leaf = TRUE;
419 } else {
420 xhtml_fixup_layout(file->child);
421 }
422 if (file->next)
423 xhtml_fixup_layout(file->next);
424 }
425
426 /*
427 * Create the tree structure so we know where everything goes.
428 * Method:
429 *
430 * Ignoring file splitting, we have three choices with each new section:
431 *
432 * +-----------------+-----------------+
433 * | | |
434 * X +----X----+ (1)
435 * | |
436 * Y (2)
437 * |
438 * (3)
439 *
440 * Y is the last section we added (currentsect).
441 * If sect is the section we want to add, then:
442 *
443 * (1) if sect->level < currentsect->level
444 * (2) if sect->level == currentsect->level
445 * (3) if sect->level > currentsect->level
446 *
447 * This requires the constraint that you never skip section numbers
448 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
449 *
450 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
451 * more than one level at a time. Lots of asserts, and probably part of
452 * the algorithm here, rely on this being true. (It currently isn't
453 * enforced by halibut, however.)
454 *
455 * File splitting makes this harder. For instance, say we added at (3)
456 * above and now need to add another section. We are splitting at level
457 * 2, ie the level of Y. Z is the last section we added:
458 *
459 * +-----------------+-----------------+
460 * | | |
461 * X +----X----+ (1)
462 * | |
463 * +----Y----+ (1)
464 * | |
465 * Z (2)
466 * |
467 * (3)
468 *
469 * The (1) case is now split; we need to search upwards to find where
470 * to actually link in. The other two cases remain the same (and will
471 * always be like this).
472 *
473 * File splitting makes this harder, however. The decision of whether
474 * to split to a new file is always on the same condition, however (is
475 * the level of this section higher than the leaf_level configuration
476 * value or not).
477 *
478 * Treating the cases backwards:
479 *
480 * (3) same file if sect->level > conf.leaf_level, otherwise new file
481 *
482 * if in the same file, currentsect->child points to sect
483 * otherwise the linking is done through the file tree (which works
484 * in more or less the same way, ie currentfile->child points to
485 * the new file)
486 *
487 * (2) same file if sect->level > conf.leaf_level, otherwise new file
488 *
489 * if in the same file, currentsect->next points to sect
490 * otherwise file linking and currentfile->next points to the new
491 * file (we know that Z must have caused a new file to be created)
492 *
493 * (1) same file if sect->level > conf.leaf_level, otherwise new file
494 *
495 * this is actually effectively the same case as (2) here,
496 * except that we first have to travel up the sections to figure
497 * out which section this new one will be a sibling of. In doing
498 * so, we may disappear off the top of a file and have to go up
499 * to its parent in the file tree.
500 *
501 */
502 static void xhtml_ponder_layout(paragraph *p)
503 {
504 xhtmlsection *lastsection;
505 xhtmlsection *currentsect;
506 xhtmlfile *currentfile;
507
508 lastfile = NULL;
509 topsection = xhtml_new_section(NULL);
510 topfile = xhtml_new_file(NULL);
511 lastsection = topsection;
512 currentfile = topfile;
513 currentsect = topsection;
514
515 if (conf.leaf_level == 0) {
516 topfile->is_leaf = 1;
517 topfile->sections = topsection;
518 topsection->file = topfile;
519 }
520
521 for (; p; p=p->next)
522 {
523 int level = xhtml_para_level(p);
524 if (level>0) /* actually a section */
525 {
526 xhtmlsection *sect;
527 word *w;
528 char *c;
529 rdstringc fname_c = { 0, 0, NULL };
530
531 sect = xhtml_new_section(lastsection);
532 lastsection = sect;
533 sect->para = p;
534 for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
535 {
536 switch (removeattr(w->type))
537 {
538 case word_Normal:
539 /*case word_Emph:
540 case word_Code:
541 case word_WeakCode:*/
542 xhtml_utostr(w->text, &c);
543 rdaddsc(&fname_c,c);
544 sfree(c);
545 break;
546 }
547 }
548 /* rdaddsc(&fname_c, ".html");*/
549 sect->fragment = rdtrimc(&fname_c);
550 sect->level = level;
551 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
552
553 if (level>currentsect->level) { /* case (3) */
554 if (level>conf.leaf_level) { /* same file */
555 assert(currentfile->is_leaf);
556 currentsect->child = sect;
557 sect->parent=currentsect;
558 sect->file=currentfile;
559 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
560 currentsect=sect;
561 } else { /* new file */
562 xhtmlfile *file = xhtml_new_file(sect);
563 assert(!currentfile->is_leaf);
564 currentfile->child=file;
565 sect->file=file;
566 file->parent=currentfile;
567 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
568 currentfile=file;
569 currentsect=sect;
570 }
571 } else if (level >= currentsect->file->sections->level) {
572 /* Case (1) or (2) *AND* still under the section that starts
573 * the current file.
574 *
575 * I'm not convinced that this couldn't be rolled in with the
576 * final else {} leg further down. It seems a lot of effort
577 * this way.
578 */
579 if (level>conf.leaf_level) { /* stick within the same file */
580 assert(currentfile->is_leaf);
581 sect->file = currentfile;
582 while (currentsect && currentsect->level > level &&
583 currentsect->file==currentsect->parent->file) {
584 currentsect = currentsect->parent;
585 }
586 assert(currentsect);
587 currentsect->next = sect;
588 assert(currentsect->level == sect->level);
589 sect->parent = currentsect->parent;
590 currentsect = sect;
591 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
592 } else { /* new file */
593 xhtmlfile *file = xhtml_new_file(sect);
594 sect->file=file;
595 currentfile->next=file;
596 file->parent=currentfile->parent;
597 file->is_leaf=(level==conf.leaf_level);
598 file->sections=sect;
599 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
600 currentfile=file;
601 currentsect=sect;
602 }
603 } else { /* Case (1) or (2) and we must move up the file tree first */
604 /* this loop is now probably irrelevant - we know we can't connect
605 * to anything in the current file */
606 while (currentsect && level<currentsect->level) {
607 currentsect=currentsect->parent;
608 if (currentsect) {
609 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
610 } else {
611 /* printf(" * up one level (off top of current file)\n");*/
612 }
613 }
614 if (currentsect) {
615 /* I'm pretty sure this can now never fire */
616 assert(currentfile->is_leaf);
617 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
618 sect->file = currentfile;
619 currentsect->next=sect;
620 currentsect=sect;
621 } else { /* find a file we can attach to */
622 while (currentfile && currentfile->sections && level<currentfile->sections->level) {
623 currentfile=currentfile->parent;
624 if (currentfile) {
625 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
626 } else {
627 /* printf(" * up one file level (off top of tree)\n");*/
628 }
629 }
630 if (currentfile) { /* new file (we had to skip up a file to
631 get here, so we must be dealing with a
632 level no lower than the configured
633 leaf_level */
634 xhtmlfile *file = xhtml_new_file(sect);
635 currentfile->next=file;
636 sect->file=file;
637 file->parent=currentfile->parent;
638 file->is_leaf=(level==conf.leaf_level);
639 file->sections=sect;
640 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
641 currentfile=file;
642 currentsect=sect;
643 } else {
644 fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
645 }
646 }
647 }
648 }
649 }
650 topsection = lastsection; /* get correct end of the chain */
651 xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
652 }
653
654 static void xhtml_do_index();
655 static void xhtml_do_file(xhtmlfile *file);
656 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
657 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
658 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
659 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
660 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
661 static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
662 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
663 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
664
665 /*
666 * Do all the files in this structure.
667 */
668 static void xhtml_do_files(xhtmlfile *file)
669 {
670 xhtml_do_file(file);
671 if (file->child)
672 xhtml_do_files(file->child);
673 if (file->next)
674 xhtml_do_files(file->next);
675 }
676
677 /*
678 * Free up all memory used by the file tree from 'xfile' downwards
679 */
680 static void xhtml_free_file(xhtmlfile* xfile)
681 {
682 if (xfile==NULL) {
683 return;
684 }
685
686 if (xfile->filename) {
687 sfree(xfile->filename);
688 }
689 xhtml_free_file(xfile->child);
690 xhtml_free_file(xfile->next);
691 sfree(xfile);
692 }
693
694 /*
695 * Main function.
696 */
697 void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
698 indexdata *in_idx)
699 {
700 /* int i;*/
701 indexentry *ientry;
702 int ti;
703 xhtmlsection *xsect;
704
705 sourceparas = sourceform;
706 conf = xhtml_configure(sourceform);
707 keywords = in_keywords;
708 idx = in_idx;
709
710 /* Clear up the index entries backend data pointers */
711 for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
712 ientry->backend_data=NULL;
713 }
714
715 xhtml_ponder_layout(sourceform);
716
717 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
718 /* xhtml_level_0(sourceform);
719 for (i=1; i<=conf.leaf_level; i++)
720 {
721 xhtml_level(sourceform, i);
722 }*/
723
724 /* new system ... (writes to *.html, but isn't fully trusted) */
725 xhtml_do_top_file(topfile, sourceform);
726 assert(!topfile->next); /* shouldn't have a sibling at all */
727 if (topfile->child) {
728 xhtml_do_files(topfile->child);
729 xhtml_do_index();
730 }
731
732 /* release file, section, index data structures */
733 xsect = topsection;
734 while (xsect) {
735 xhtmlsection *tmp = xsect->chain;
736 if (xsect->fragment) {
737 sfree(xsect->fragment);
738 }
739 sfree(xsect);
740 xsect = tmp;
741 }
742 xhtml_free_file(topfile);
743 for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
744 if (ientry->backend_data!=NULL) {
745 xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
746 if (xi->sections!=NULL) {
747 sfree(xi->sections);
748 }
749 sfree(xi);
750 }
751 ientry->backend_data = NULL;
752 }
753 sfree(conf.fsect);
754 }
755
756 static int xhtml_para_level(paragraph *p)
757 {
758 switch (p->type)
759 {
760 case para_Title:
761 return 0;
762 break;
763 case para_UnnumberedChapter:
764 case para_Chapter:
765 case para_Appendix:
766 return 1;
767 break;
768 /* case para_BiblioCited:
769 return 2;
770 break;*/
771 case para_Heading:
772 case para_Subsect:
773 return p->aux+2;
774 break;
775 default:
776 return -1;
777 break;
778 }
779 }
780
781 /* Output the nav links for the current file.
782 * file == NULL means we're doing the index
783 */
784 static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
785 {
786 xhtmlfile *xhtml_next_file = NULL;
787 fprintf(fp, "<p");
788 if (conf.nav_attrs!=NULL) {
789 fprintf(fp, " %ls>", conf.nav_attrs);
790 } else {
791 fprintf(fp, ">");
792 }
793 if (xhtml_last_file==NULL) {
794 fprintf(fp, "Previous | ");
795 } else {
796 fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
797 }
798 fprintf(fp, "<a href='%s'>Contents</a> | ", conf.contents_filename);
799 if (file == NULL) {
800 fprintf(fp, "Index | ");
801 } else {
802 fprintf(fp, "<a href='%s'>Index</a> | ", conf.index_filename);
803 }
804 if (file != NULL) { /* otherwise we're doing nav links for the index */
805 if (xhtml_next_file==NULL)
806 xhtml_next_file = file->child;
807 if (xhtml_next_file==NULL)
808 xhtml_next_file = file->next;
809 if (xhtml_next_file==NULL)
810 xhtml_next_file = file->parent->next;
811 }
812 if (xhtml_next_file==NULL) {
813 if (file==NULL) { /* index, so no next file */
814 fprintf(fp, "Next ");
815 } else {
816 fprintf(fp, "<a href='%s'>Next</a>", conf.index_filename);
817 }
818 } else {
819 fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
820 }
821 fprintf(fp, "</p>\n");
822 }
823
824 /* Write out the index file */
825 static void xhtml_do_index_body(FILE *fp)
826 {
827 indexentry *y;
828 int ti;
829
830 if (count234(idx->entries) == 0)
831 return; /* don't write anything at all */
832
833 fprintf(fp, "<dl>\n");
834 /* iterate over idx->entries using the tree functions and display everything */
835 for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
836 if (y->backend_data) {
837 int i;
838 xhtmlindex *xi;
839
840 fprintf(fp, "<dt>");
841 xhtml_para(fp, y->text, FALSE);
842 fprintf(fp, "</dt>\n<dd>");
843
844 xi = (xhtmlindex*) y->backend_data;
845 for (i=0; i<xi->nsection; i++) {
846 xhtmlsection *sect = xi->sections[i];
847 if (sect) {
848 fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
849 if (sect->para->kwtext) {
850 xhtml_para(fp, sect->para->kwtext, FALSE);
851 } else if (sect->para->words) {
852 xhtml_para(fp, sect->para->words, FALSE);
853 }
854 fprintf(fp, "</a>");
855 if (i+1<xi->nsection) {
856 fprintf(fp, ", ");
857 }
858 }
859 }
860 fprintf(fp, "</dd>\n");
861 }
862 }
863 fprintf(fp, "</dl>\n");
864 }
865 static void xhtml_do_index()
866 {
867 word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
868 FILE *fp = fopen(conf.index_filename, "w");
869
870 if (fp==NULL)
871 fatal(err_cantopenw, conf.index_filename);
872 xhtml_doheader(fp, &temp_word);
873 xhtml_donavlinks(fp, NULL);
874
875 xhtml_do_index_body(fp);
876
877 xhtml_donavlinks(fp, NULL);
878 xhtml_dofooter(fp);
879 fclose(fp);
880 }
881
882 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
883 static void xhtml_do_file(xhtmlfile *file)
884 {
885 FILE *fp = fopen(file->filename, "w");
886 if (fp==NULL)
887 fatal(err_cantopenw, file->filename);
888
889 if (file->sections->para->words) {
890 xhtml_doheader(fp, file->sections->para->words);
891 } else if (file->sections->para->kwtext) {
892 xhtml_doheader(fp, file->sections->para->kwtext);
893 } else {
894 xhtml_doheader(fp, NULL);
895 }
896
897 xhtml_donavlinks(fp, file);
898
899 if (file->is_leaf && conf.leaf_contains_contents &&
900 xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
901 xhtml_do_contents(fp, file);
902 xhtml_do_sections(fp, file->sections);
903 if (!file->is_leaf)
904 xhtml_do_naked_contents(fp, file);
905
906 xhtml_donavlinks(fp, file);
907
908 xhtml_dofooter(fp);
909 fclose(fp);
910
911 xhtml_last_file = file;
912 }
913
914 /* Output the top-level file. */
915 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
916 {
917 paragraph *p;
918 int done=FALSE;
919 FILE *fp = fopen(file->filename, "w");
920 if (fp==NULL)
921 fatal(err_cantopenw, file->filename);
922
923 /* Do the title -- only one allowed */
924 for (p = sourceform; p && !done; p = p->next)
925 {
926 if (p->type == para_Title)
927 {
928 xhtml_doheader(fp, p->words);
929 done=TRUE;
930 }
931 }
932 if (!done)
933 xhtml_doheader(fp, NULL /* Eek! */);
934
935 /*
936 * Display the title.
937 */
938 for (p = sourceform; p; p = p->next)
939 {
940 if (p->type == para_Title) {
941 xhtml_heading(fp, p, FALSE);
942 break;
943 }
944 }
945
946 /* Do the preamble */
947 for (p = sourceform; p; p = p->next)
948 {
949 if (p->type == para_Chapter || p->type == para_Heading ||
950 p->type == para_Subsect || p->type == para_Appendix ||
951 p->type == para_UnnumberedChapter) {
952 /*
953 * We've found the end of the preamble. Do every normal
954 * paragraph up to there.
955 */
956 xhtml_do_paras(fp, sourceform, p, FALSE);
957 break;
958 }
959 }
960
961 xhtml_do_contents(fp, file);
962 xhtml_do_sections(fp, file->sections);
963
964 /*
965 * Put the index in the top file if we're in single-file mode
966 * (leaf-level 0).
967 */
968 if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
969 fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
970 xhtml_do_index_body(fp);
971 }
972
973 xhtml_dofooter(fp);
974 fclose(fp);
975 }
976
977 /* Convert a Unicode string to an ASCII one. '?' is
978 * used for unmappable characters.
979 */
980 static void xhtml_utostr(wchar_t *in, char **out)
981 {
982 int l = ustrlen(in);
983 int i;
984 *out = smalloc(l+1);
985 for (i=0; i<l; i++)
986 {
987 if (in[i]>=32 && in[i]<=126)
988 (*out)[i]=(char)in[i];
989 else
990 (*out)[i]='?';
991 }
992 (*out)[i]=0;
993 }
994
995 /*
996 * Write contents for the given file, and subfiles, down to
997 * the appropriate contents depth. Returns the number of
998 * entries written.
999 */
1000 static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
1001 {
1002 int level, limit, count = 0;
1003 if (!file)
1004 return 0;
1005
1006 level = (file->sections)?(file->sections->level):(0);
1007 limit = conf.contents_depth[(level>5)?(5):(level)];
1008 start_level = (file->is_leaf) ? (level-1) : (level);
1009 last_level = start_level;
1010
1011 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1012 count += xhtml_do_contents_limit(fp, file->child, limit);
1013 if (fp!=NULL) {
1014 while (last_level > start_level) {
1015 last_level--;
1016 fprintf(fp, "</li></ul>\n");
1017 }
1018 }
1019 return count;
1020 }
1021
1022 /* As above, but doesn't do anything in the current file */
1023 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
1024 {
1025 int level, limit, start_level, count = 0;
1026 if (!file)
1027 return 0;
1028
1029 level = (file->sections)?(file->sections->level):(0);
1030 limit = conf.contents_depth[(level>5)?(5):(level)];
1031 start_level = (file->is_leaf) ? (level-1) : (level);
1032 last_level = start_level;
1033
1034 count = xhtml_do_contents_limit(fp, file->child, limit);
1035 if (fp!=NULL) {
1036 while (last_level > start_level) {
1037 last_level--;
1038 fprintf(fp, "</li></ul>\n");
1039 }
1040 }
1041 return count;
1042 }
1043
1044 /*
1045 * Write contents for the given file, children, and siblings, down to
1046 * given limit contents depth.
1047 */
1048 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
1049 {
1050 int count = 0;
1051 while (file) {
1052 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1053 count += xhtml_do_contents_limit(fp, file->child, limit);
1054 file = file->next;
1055 }
1056 return count;
1057 }
1058
1059 /*
1060 * Write contents entries for the given section tree, down to the
1061 * limit contents depth.
1062 */
1063 static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
1064 {
1065 int count = 0;
1066 while (section) {
1067 if (!xhtml_add_contents_entry(fp, section, limit))
1068 return 0;
1069 else
1070 count++;
1071 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1072 section = section->next;
1073 }
1074 return count;
1075 }
1076
1077 /*
1078 * Write contents entries for the given section tree, down to the
1079 * limit contents depth.
1080 */
1081 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1082 {
1083 int count = 0;
1084 if (!section)
1085 return 0;
1086 xhtml_add_contents_entry(fp, section, limit);
1087 count=1;
1088 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1089 /* section=section->child;
1090 while (section && xhtml_add_contents_entry(fp, section, limit)) {
1091 section = section->next;
1092 }*/
1093 return count;
1094 }
1095
1096 /*
1097 * Add a section entry, unless we're exceeding the limit, in which
1098 * case return FALSE (otherwise return TRUE).
1099 */
1100 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1101 {
1102 if (!section || section->level > limit)
1103 return FALSE;
1104 if (fp==NULL || section->level < 0)
1105 return TRUE;
1106 if (last_level > section->level) {
1107 while (last_level > section->level) {
1108 last_level--;
1109 fprintf(fp, "</li></ul>\n");
1110 }
1111 fprintf(fp, "</li>\n");
1112 } else if (last_level < section->level) {
1113 assert(last_level == section->level - 1);
1114 last_level++;
1115 fprintf(fp, "<ul>\n");
1116 } else {
1117 fprintf(fp, "</li>\n");
1118 }
1119 fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1120 if (section->para->kwtext) {
1121 xhtml_para(fp, section->para->kwtext, FALSE);
1122 if (section->para->words) {
1123 fprintf(fp, ": ");
1124 }
1125 }
1126 if (section->para->words) {
1127 xhtml_para(fp, section->para->words, FALSE);
1128 }
1129 fprintf(fp, "</a>\n");
1130 return TRUE;
1131 }
1132
1133 /*
1134 * Write all the sections in this file. Do all paragraphs in this section, then all
1135 * children (recursively), then go on to the next one (tail recursively).
1136 */
1137 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1138 {
1139 while (sections) {
1140 currentsection = sections;
1141 xhtml_do_paras(fp, sections->para, NULL, TRUE);
1142 xhtml_do_sections(fp, sections->child);
1143 sections = sections->next;
1144 }
1145 }
1146
1147 /* Write this list of paragraphs. Close off all lists at the end. */
1148 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1149 int indexable)
1150 {
1151 int last_type = -1, ptype, first=TRUE;
1152 stack lcont_stack = stk_new();
1153 if (!p)
1154 return;
1155
1156 /* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1157 for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
1158 first=FALSE;
1159 switch (ptype = p->type)
1160 {
1161 /*
1162 * Things we ignore because we've already processed them or
1163 * aren't going to touch them in this pass.
1164 */
1165 case para_IM:
1166 case para_BR:
1167 case para_Biblio: /* only touch BiblioCited */
1168 case para_VersionID:
1169 case para_NoCite:
1170 case para_Title:
1171 break;
1172
1173 /*
1174 * Chapter titles.
1175 */
1176 case para_Chapter:
1177 case para_Appendix:
1178 case para_UnnumberedChapter:
1179 xhtml_heading(fp, p, indexable);
1180 break;
1181
1182 case para_Heading:
1183 case para_Subsect:
1184 xhtml_heading(fp, p, indexable);
1185 break;
1186
1187 case para_Rule:
1188 fprintf(fp, "\n<hr />\n");
1189 break;
1190
1191 case para_Normal:
1192 case para_Copyright:
1193 fprintf(fp, "\n<p>");
1194 xhtml_para(fp, p->words, indexable);
1195 fprintf(fp, "</p>\n");
1196 break;
1197
1198 case para_LcontPush:
1199 {
1200 int *p;
1201 p = mknew(int);
1202 *p = last_type;
1203 stk_push(lcont_stack, p);
1204 last_type = para_Normal;
1205 }
1206 break;
1207 case para_LcontPop:
1208 {
1209 int *p = stk_pop(lcont_stack);
1210 assert(p);
1211 ptype = last_type = *p;
1212 sfree(p);
1213 goto closeofflist; /* ick */
1214 }
1215 break;
1216 case para_QuotePush:
1217 fprintf(fp, "<blockquote>\n");
1218 break;
1219 case para_QuotePop:
1220 fprintf(fp, "</blockquote>\n");
1221 break;
1222
1223 case para_Bullet:
1224 case para_NumberedList:
1225 case para_Description:
1226 case para_DescribedThing:
1227 case para_BiblioCited:
1228 if (last_type!=p->type &&
1229 !(last_type==para_DescribedThing && p->type==para_Description) &&
1230 !(last_type==para_Description && p->type==para_DescribedThing)) {
1231 /* start up list if necessary */
1232 if (p->type == para_Bullet) {
1233 fprintf(fp, "<ul>\n");
1234 } else if (p->type == para_NumberedList) {
1235 fprintf(fp, "<ol>\n");
1236 } else if (p->type == para_BiblioCited ||
1237 p->type == para_DescribedThing ||
1238 p->type == para_Description) {
1239 fprintf(fp, "<dl>\n");
1240 }
1241 }
1242 if (p->type == para_Bullet || p->type == para_NumberedList) {
1243 fprintf(fp, "<li>");
1244 } else if (p->type == para_DescribedThing) {
1245 fprintf(fp, "<dt>");
1246 } else if (p->type == para_Description) {
1247 fprintf(fp, "<dd>");
1248 } else if (p->type == para_BiblioCited) {
1249 fprintf(fp, "<dt>");
1250 xhtml_para(fp, p->kwtext, indexable);
1251 fprintf(fp, "</dt>\n<dd>");
1252 }
1253 xhtml_para(fp, p->words, indexable);
1254 {
1255 paragraph *p2 = p->next;
1256 if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1257 break;
1258 }
1259
1260 closeofflist:
1261 if (ptype == para_BiblioCited) {
1262 fprintf(fp, "</dd>\n");
1263 } else if (ptype == para_DescribedThing) {
1264 fprintf(fp, "</dt>");
1265 } else if (ptype == para_Description) {
1266 fprintf(fp, "</dd>");
1267 } else if (ptype == para_Bullet || ptype == para_NumberedList) {
1268 fprintf(fp, "</li>");
1269 }
1270 if (ptype == para_Bullet || ptype == para_NumberedList ||
1271 ptype == para_BiblioCited || ptype == para_Description ||
1272 ptype == para_DescribedThing)
1273 /* close off list if necessary */
1274 {
1275 paragraph *p2 = p->next;
1276 int close_off=FALSE;
1277 /* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1278 if (p2 && xhtml_para_level(p2)==-1) {
1279 if (p2->type != ptype &&
1280 !(p2->type==para_DescribedThing && ptype==para_Description) &&
1281 !(p2->type==para_Description && ptype==para_DescribedThing) &&
1282 p2->type != para_LcontPush)
1283 close_off=TRUE;
1284 } else {
1285 close_off=TRUE;
1286 }
1287 if (close_off) {
1288 if (ptype == para_Bullet) {
1289 fprintf(fp, "</ul>\n");
1290 } else if (ptype == para_NumberedList) {
1291 fprintf(fp, "</ol>\n");
1292 } else if (ptype == para_BiblioCited ||
1293 ptype == para_Description ||
1294 ptype == para_DescribedThing) {
1295 fprintf(fp, "</dl>\n");
1296 }
1297 }
1298 }
1299 break;
1300
1301 case para_Code:
1302 xhtml_codepara(fp, p->words);
1303 break;
1304 }
1305 last_type = ptype;
1306 }
1307
1308 stk_free(lcont_stack);
1309 }
1310
1311 /*
1312 * Output a header for this XHTML file.
1313 */
1314 static void xhtml_doheader(FILE *fp, word *title)
1315 {
1316 fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1317 fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1318 fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1319 if (title==NULL)
1320 fprintf(fp, "The thing with no name!");
1321 else
1322 xhtml_para(fp, title, FALSE);
1323 fprintf(fp, "</title>\n");
1324 fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1325 if (conf.author)
1326 fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1327 if (conf.description)
1328 fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1329 if (conf.head_end)
1330 fprintf(fp, "%ls\n", conf.head_end);
1331 fprintf(fp, "</head>\n\n");
1332 if (conf.body)
1333 fprintf(fp, "%ls\n", conf.body);
1334 else
1335 fprintf(fp, "<body>\n");
1336 if (conf.body_start)
1337 fprintf(fp, "%ls\n", conf.body_start);
1338 }
1339
1340 /*
1341 * Output a footer for this XHTML file.
1342 */
1343 static void xhtml_dofooter(FILE *fp)
1344 {
1345 fprintf(fp, "\n<hr />\n\n");
1346 if (conf.body_end)
1347 fprintf(fp, "%ls\n", conf.body_end);
1348 if (!conf.suppress_address) {
1349 fprintf(fp,"<address>\n");
1350 if (conf.address_start)
1351 fprintf(fp, "%ls\n", conf.address_start);
1352 /* Do the version ID */
1353 if (conf.include_version_id) {
1354 paragraph *p;
1355 int started = 0;
1356 for (p = sourceparas; p; p = p->next)
1357 if (p->type == para_VersionID) {
1358 xhtml_versionid(fp, p->words, started);
1359 started = 1;
1360 }
1361 }
1362 if (conf.address_end)
1363 fprintf(fp, "%ls\n", conf.address_end);
1364 fprintf(fp, "</address>\n");
1365 }
1366 fprintf(fp, "</body>\n\n</html>\n");
1367 }
1368
1369 /*
1370 * Output the versionid paragraph. Typically this is a version control
1371 * ID string (such as $Id...$ in RCS).
1372 */
1373 static void xhtml_versionid(FILE *fp, word *text, int started)
1374 {
1375 rdstringc t = { 0, 0, NULL };
1376
1377 rdaddc(&t, '['); /* FIXME: configurability */
1378 xhtml_rdaddwc(&t, text, NULL, FALSE);
1379 rdaddc(&t, ']'); /* FIXME: configurability */
1380
1381 if (started)
1382 fprintf(fp, "<br />\n");
1383 fprintf(fp, "%s\n", t.text);
1384 sfree(t.text);
1385 }
1386
1387 /* Is this an XHTML reserved character? */
1388 static int xhtml_reservedchar(int c)
1389 {
1390 if (c=='&' || c=='<' || c=='>' || c=='"')
1391 return TRUE;
1392 else
1393 return FALSE;
1394 }
1395
1396 /*
1397 * Convert a wide string into valid XHTML: Anything outside ASCII will
1398 * be fixed up as an entity. Currently we don't worry about constraining the
1399 * encoded character set, which we should probably do at some point (we can
1400 * still fix up and return FALSE - see the last comment here). We also don't
1401 * currently
1402 *
1403 * Because this is only used for words, spaces are HARD spaces (any other
1404 * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1405 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1406 * rule).
1407 *
1408 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1409 * it in `*result'. If `result' is NULL, merely checks whether all
1410 * characters in the string are feasible.
1411 *
1412 * Return is nonzero if all characters are OK. If not all
1413 * characters are OK but `result' is non-NULL, a result _will_
1414 * still be generated!
1415 */
1416 static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1417 int hard_spaces) {
1418 int doing = (result != 0);
1419 int ok = TRUE;
1420 char *p = NULL;
1421 int plen = 0, psize = 0;
1422
1423 if (maxlen <= 0)
1424 maxlen = -1;
1425
1426 for (; *s && maxlen != 0; s++, maxlen--) {
1427 wchar_t c = *s;
1428
1429 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1430
1431 if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1432 /* Char is OK. */
1433 if (doing)
1434 {
1435 ensure_size(plen);
1436 p[plen++] = (char)c;
1437 }
1438 } else {
1439 /* Char needs fixing up. */
1440 /* ok = FALSE; -- currently we never return FALSE; we
1441 * might want to when considering a character set for the
1442 * encoded document.
1443 */
1444 if (doing)
1445 {
1446 if (c==32) { /* a space in a word is a hard space */
1447 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1448 sprintf(p+plen, "&nbsp;");
1449 plen+=6;
1450 } else {
1451 /* FIXME: entity names! */
1452 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1453 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1454 }
1455 }
1456 }
1457 }
1458 if (doing) {
1459 p = resize(p, plen+1);
1460 p[plen] = '\0';
1461 *result = p;
1462 }
1463 return ok;
1464 }
1465
1466 /*
1467 * This formats the given words as XHTML.
1468 *
1469 * `indexable', if FALSE, prohibits adding any index references.
1470 * You might use this, for example, if an index reference occurred
1471 * in a section title, to prevent phony index references when the
1472 * section title is processed in strange places such as contents
1473 * sections.
1474 */
1475 static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
1476 char *c;
1477 keyword *kwl;
1478 xhtmlsection *sect;
1479 indextag *itag;
1480 int ti;
1481
1482 for (; text && text != end; text = text->next) {
1483 switch (text->type) {
1484 case word_HyperLink:
1485 xhtml_utostr(text->text, &c);
1486 rdaddsc(rs, "<a href=\"");
1487 rdaddsc(rs, c);
1488 rdaddsc(rs, "\">");
1489 sfree(c);
1490 break;
1491
1492 case word_UpperXref:
1493 case word_LowerXref:
1494 kwl = kw_lookup(keywords, text->text);
1495 if (kwl) {
1496 sect=xhtml_find_section(kwl->para);
1497 if (sect) {
1498 rdaddsc(rs, "<a href=\"");
1499 rdaddsc(rs, sect->file->filename);
1500 rdaddc(rs, '#');
1501 rdaddsc(rs, sect->fragment);
1502 rdaddsc(rs, "\">");
1503 } else {
1504 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1505 error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1506 }
1507 } else {
1508 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1509 error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1510 }
1511 break;
1512
1513 case word_IndexRef: /* in theory we could make an index target here */
1514 /* rdaddsc(rs, "<a name=\"idx-");
1515 xhtml_utostr(text->text, &c);
1516 rdaddsc(rs, c);
1517 sfree(c);
1518 rdaddsc(rs, "\"></a>");*/
1519 /* what we _do_ need to do is to fix up the backend data
1520 * for any indexentry this points to.
1521 */
1522 if (!indexable)
1523 break;
1524
1525 for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1526 /* FIXME: really ustricmp() and not ustrcmp()? */
1527 if (ustricmp(itag->name, text->text)==0) {
1528 break;
1529 }
1530 }
1531 if (itag!=NULL) {
1532 if (itag->refs!=NULL) {
1533 int i;
1534 for (i=0; i<itag->nrefs; i++) {
1535 xhtmlindex *idx_ref;
1536 indexentry *ientry;
1537
1538 ientry = itag->refs[i];
1539 if (ientry->backend_data==NULL) {
1540 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1541 if (idx_ref==NULL)
1542 fatal(err_nomemory);
1543 idx_ref->nsection = 0;
1544 idx_ref->size = 4;
1545 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1546 if (idx_ref->sections==NULL)
1547 fatal(err_nomemory);
1548 ientry->backend_data = idx_ref;
1549 } else {
1550 idx_ref = ientry->backend_data;
1551 if (idx_ref->nsection+1 > idx_ref->size) {
1552 int new_size = idx_ref->size * 2;
1553 idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1554 if (idx_ref->sections==NULL) {
1555 fatal(err_nomemory);
1556 }
1557 idx_ref->size = new_size;
1558 }
1559 }
1560 idx_ref->sections[idx_ref->nsection++] = currentsection;
1561 #if 0
1562 #endif
1563 }
1564 } else {
1565 fatal(err_whatever, "Index tag had no entries!");
1566 }
1567 } else {
1568 fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1569 fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1570 }
1571 break;
1572
1573 case word_HyperEnd:
1574 case word_XrefEnd:
1575 rdaddsc(rs, "</a>");
1576 break;
1577
1578 case word_Normal:
1579 case word_Emph:
1580 case word_Code:
1581 case word_WeakCode:
1582 case word_WhiteSpace:
1583 case word_EmphSpace:
1584 case word_CodeSpace:
1585 case word_WkCodeSpace:
1586 case word_Quote:
1587 case word_EmphQuote:
1588 case word_CodeQuote:
1589 case word_WkCodeQuote:
1590 assert(text->type != word_CodeQuote &&
1591 text->type != word_WkCodeQuote);
1592 if (towordstyle(text->type) == word_Emph &&
1593 (attraux(text->aux) == attr_First ||
1594 attraux(text->aux) == attr_Only))
1595 rdaddsc(rs, "<em>");
1596 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1597 (attraux(text->aux) == attr_First ||
1598 attraux(text->aux) == attr_Only))
1599 rdaddsc(rs, "<code>");
1600
1601 if (removeattr(text->type) == word_Normal) {
1602 if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
1603 rdaddsc(rs, c);
1604 else
1605 xhtml_rdaddwc(rs, text->alt, NULL, indexable);
1606 sfree(c);
1607 } else if (removeattr(text->type) == word_WhiteSpace) {
1608 rdaddc(rs, ' ');
1609 } else if (removeattr(text->type) == word_Quote) {
1610 rdaddsc(rs, "&quot;");
1611 }
1612
1613 if (towordstyle(text->type) == word_Emph &&
1614 (attraux(text->aux) == attr_Last ||
1615 attraux(text->aux) == attr_Only))
1616 rdaddsc(rs, "</em>");
1617 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1618 (attraux(text->aux) == attr_Last ||
1619 attraux(text->aux) == attr_Only))
1620 rdaddsc(rs, "</code>");
1621 break;
1622 }
1623 }
1624 }
1625
1626 /* Output a heading, formatted as XHTML.
1627 */
1628 static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
1629 {
1630 rdstringc t = { 0, 0, NULL };
1631 word *tprefix = p->kwtext;
1632 word *nprefix = p->kwtext2;
1633 word *text = p->words;
1634 int level = xhtml_para_level(p);
1635 xhtmlsection *sect = xhtml_find_section(p);
1636 xhtmlheadfmt *fmt;
1637 char *fragment;
1638 if (sect) {
1639 fragment = sect->fragment;
1640 } else {
1641 if (p->type == para_Title)
1642 fragment = "title";
1643 else {
1644 fragment = ""; /* FIXME: what else can we do? */
1645 error(err_whatever, "Couldn't locate heading cross-reference!");
1646 }
1647 }
1648
1649 if (p->type == para_Title)
1650 fmt = NULL;
1651 else if (level == 1)
1652 fmt = &conf.fchapter;
1653 else if (level-1 < conf.nfsect)
1654 fmt = &conf.fsect[level-1];
1655 else
1656 fmt = &conf.fsect[conf.nfsect-1];
1657
1658 if (fmt && fmt->just_numbers && nprefix) {
1659 xhtml_rdaddwc(&t, nprefix, NULL, indexable);
1660 if (fmt) {
1661 char *c;
1662 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1663 rdaddsc(&t, c);
1664 sfree(c);
1665 }
1666 }
1667 } else if (fmt && !fmt->just_numbers && tprefix) {
1668 xhtml_rdaddwc(&t, tprefix, NULL, indexable);
1669 if (fmt) {
1670 char *c;
1671 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1672 rdaddsc(&t, c);
1673 sfree(c);
1674 }
1675 }
1676 }
1677 xhtml_rdaddwc(&t, text, NULL, indexable);
1678 /*
1679 * If we're outputting in single-file mode, we need to lower
1680 * the level of each heading by one, because the overall
1681 * document title will be sitting right at the top as an <h1>
1682 * and so chapters and sections should start at <h2>.
1683 *
1684 * Even if not, the document title will come back from
1685 * xhtml_para_level() as level zero, so we must increment that
1686 * no matter what leaf_level is set to.
1687 */
1688 if (conf.leaf_level == 0 || level == 0)
1689 level++;
1690 fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1691 sfree(t.text);
1692 }
1693
1694 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1695 * This looks pretty simple; I may have missed something ...
1696 */
1697 static void xhtml_para(FILE *fp, word *text, int indexable)
1698 {
1699 rdstringc out = { 0, 0, NULL };
1700 xhtml_rdaddwc(&out, text, NULL, indexable);
1701 fprintf(fp, "%s", out.text);
1702 sfree(out.text);
1703 }
1704
1705 /* Output a code paragraph. I'm treating this as preformatted, which
1706 * may not be entirely correct. See xhtml_para() for my worries about
1707 * this being overly-simple; however I think that most of the complexity
1708 * of the text backend came entirely out of word wrapping anyway.
1709 */
1710 static void xhtml_codepara(FILE *fp, word *text)
1711 {
1712 fprintf(fp, "<pre>");
1713 for (; text; text = text->next) if (text->type == word_WeakCode) {
1714 word *here, *next;
1715 char *c;
1716
1717 /*
1718 * See if this WeakCode is followed by an Emph to indicate
1719 * emphasis.
1720 */
1721 here = text;
1722 if (text->next && text->next->type == word_Emph) {
1723 next = text = text->next;
1724 } else
1725 next = NULL;
1726
1727 if (next) {
1728 wchar_t *t, *e;
1729 int n;
1730
1731 t = here->text;
1732 e = next->text;
1733
1734 while (*e) {
1735 int ec = *e;
1736
1737 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1738 xhtml_convert(t, n, &c, FALSE);
1739 fprintf(fp, "%s%s%s",
1740 (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1741 c,
1742 (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1743 sfree(c);
1744
1745 t += n;
1746 e += n;
1747 }
1748
1749 xhtml_convert(t, 0, &c, FALSE);
1750 fprintf(fp, "%s\n", c);
1751 sfree(c);
1752 } else {
1753 xhtml_convert(here->text, 0, &c, FALSE);
1754 fprintf(fp, "%s\n", c);
1755 sfree(c);
1756 }
1757 }
1758 fprintf(fp, "</pre>\n");
1759 }