2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
34 struct xhtmlsection_Struct
{
35 struct xhtmlsection_Struct
*next
; /* next sibling (NULL if split across files) */
36 struct xhtmlsection_Struct
*child
; /* NULL if split across files */
37 struct xhtmlsection_Struct
*parent
; /* NULL if split across files */
38 struct xhtmlsection_Struct
*chain
; /* single structure independent of weird trees */
40 struct xhtmlfile_Struct
*file
; /* which file is this a part of? */
41 char *fragment
; /* fragment id within the file */
45 struct xhtmlfile_Struct
{
46 struct xhtmlfile_Struct
*next
;
47 struct xhtmlfile_Struct
*child
;
48 struct xhtmlfile_Struct
*parent
;
50 struct xhtmlsection_Struct
*sections
; /* sections within this file (only one for non-leaf) */
51 int is_leaf
; /* is this file a leaf file, ie does it not have any children? */
54 typedef struct xhtmlsection_Struct xhtmlsection
;
55 typedef struct xhtmlfile_Struct xhtmlfile
;
56 typedef struct xhtmlindex_Struct xhtmlindex
;
58 struct xhtmlindex_Struct
{
61 xhtmlsection
**sections
;
65 int contents_depth
[6];
66 int leaf_contains_contents
;
68 int leaf_smallest_contents
;
69 int include_version_id
;
70 wchar_t *author
, *description
;
71 wchar_t *head_end
, *body
, *body_start
, *body_end
, *address_start
, *address_end
, *nav_attrs
;
75 /*static void xhtml_level(paragraph *, int);
76 static void xhtml_level_0(paragraph *);
77 static void xhtml_docontents(FILE *, paragraph *, int);
78 static void xhtml_dosections(FILE *, paragraph *, int);
79 static void xhtml_dobody(FILE *, paragraph *, int);*/
81 static void xhtml_doheader(FILE *, word
*);
82 static void xhtml_dofooter(FILE *);
83 static void xhtml_versionid(FILE *, word
*, int);
85 static void xhtml_utostr(wchar_t *, char **);
86 static int xhtml_para_level(paragraph
*);
87 static int xhtml_reservedchar(int);
89 static int xhtml_convert(wchar_t *, char **, int);
90 static void xhtml_rdaddwc(rdstringc
*, word
*, word
*);
91 static void xhtml_para(FILE *, word
*);
92 static void xhtml_codepara(FILE *, word
*);
93 static void xhtml_heading(FILE *, paragraph
*);
95 /* File-global variables are much easier than passing these things
96 * all over the place. Evil, but easier. We can replace this with a single
97 * structure at some point.
99 static xhtmlconfig conf
;
100 static keywordlist
*keywords
;
101 static indexdata
*idx
;
102 static xhtmlfile
*topfile
;
103 static xhtmlsection
*topsection
;
104 static paragraph
*sourceparas
;
105 static xhtmlfile
*lastfile
;
106 static xhtmlfile
*xhtml_last_file
= NULL
;
107 static int last_level
=-1;
108 static xhtmlsection
*currentsection
;
110 static xhtmlconfig
xhtml_configure(paragraph
*source
)
117 ret
.contents_depth
[0] = 2;
118 ret
.contents_depth
[1] = 3;
119 ret
.contents_depth
[2] = 4;
120 ret
.contents_depth
[3] = 5;
121 ret
.contents_depth
[4] = 6;
122 ret
.contents_depth
[5] = 7;
124 ret
.leaf_smallest_contents
= 4;
125 ret
.leaf_contains_contents
= FALSE
;
126 ret
.include_version_id
= TRUE
;
128 ret
.description
= NULL
;
131 ret
.body_start
= NULL
;
133 ret
.address_start
= NULL
;
134 ret
.address_end
= NULL
;
135 ret
.nav_attrs
= NULL
;
136 ret
.suppress_address
= FALSE
;
138 for (; source
; source
= source
->next
)
140 if (source
->type
== para_Config
)
142 if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-0")) {
143 ret
.contents_depth
[0] = utoi(uadv(source
->keyword
));
144 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-1")) {
145 ret
.contents_depth
[1] = utoi(uadv(source
->keyword
));
146 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-2")) {
147 ret
.contents_depth
[2] = utoi(uadv(source
->keyword
));
148 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-3")) {
149 ret
.contents_depth
[3] = utoi(uadv(source
->keyword
));
150 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-4")) {
151 ret
.contents_depth
[4] = utoi(uadv(source
->keyword
));
152 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-5")) {
153 ret
.contents_depth
[5] = utoi(uadv(source
->keyword
));
154 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-level")) {
155 ret
.leaf_level
= utoi(uadv(source
->keyword
));
156 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-smallest-contents")) {
157 ret
.leaf_smallest_contents
= utoi(uadv(source
->keyword
));
158 } else if (!ustricmp(source
->keyword
, L
"xhtml-versionid")) {
159 ret
.include_version_id
= utob(uadv(source
->keyword
));
160 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-contains-contents")) {
161 ret
.leaf_contains_contents
= utob(uadv(source
->keyword
));
162 } else if (!ustricmp(source
->keyword
, L
"xhtml-suppress-address")) {
163 ret
.suppress_address
= utob(uadv(source
->keyword
));
164 } else if (!ustricmp(source
->keyword
, L
"xhtml-author")) {
165 ret
.author
= uadv(source
->keyword
);
166 } else if (!ustricmp(source
->keyword
, L
"xhtml-description")) {
167 ret
.description
= uadv(source
->keyword
);
168 } else if (!ustricmp(source
->keyword
, L
"xhtml-head-end")) {
169 ret
.head_end
= uadv(source
->keyword
);
170 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-start")) {
171 ret
.body_start
= uadv(source
->keyword
);
172 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-tag")) {
173 ret
.body
= uadv(source
->keyword
);
174 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-end")) {
175 ret
.body_end
= uadv(source
->keyword
);
176 } else if (!ustricmp(source
->keyword
, L
"xhtml-address-start")) {
177 ret
.address_start
= uadv(source
->keyword
);
178 } else if (!ustricmp(source
->keyword
, L
"xhtml-address-end")) {
179 ret
.address_end
= uadv(source
->keyword
);
180 } else if (!ustricmp(source
->keyword
, L
"xhtml-navigation-attributes")) {
181 ret
.nav_attrs
= uadv(source
->keyword
);
186 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
187 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
188 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
189 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
190 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
191 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
192 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
193 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
197 static xhtmlsection
*xhtml_new_section(xhtmlsection
*last
)
199 xhtmlsection
*ret
= mknew(xhtmlsection
);
207 ret
->level
=-1; /* marker: end of chain */
211 /* Returns NULL or the section that marks that paragraph */
212 static xhtmlsection
*xhtml_find_section(paragraph
*p
)
214 xhtmlsection
*ret
= topsection
;
215 if (xhtml_para_level(p
)==-1) { /* first, we back-track to a section paragraph */
216 paragraph
*p2
= sourceparas
;
217 paragraph
*p3
= NULL
;
218 while (p2
&& p2
!=p
) {
219 if (xhtml_para_level(p2
)!=-1) {
224 if (p3
==NULL
) { /* for some reason, we couldn't find a section before this paragraph ... ? */
225 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
226 * So don't do that, then.
232 while (ret
&& ret
->para
!= p
) {
233 /* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
239 static xhtmlfile
*xhtml_new_file(xhtmlsection
*sect
)
241 xhtmlfile
*ret
= mknew(xhtmlfile
);
248 ret
->is_leaf
=(sect
!=NULL
&& sect
->level
==conf
.leaf_level
);
250 if (conf
.leaf_level
==0) { /* currently unused */
251 #define FILENAME_MANUAL "Manual.html"
252 #define FILENAME_CONTENTS "Contents.html"
253 ret
->filename
= smalloc(strlen(FILENAME_MANUAL
)+1);
254 sprintf(ret
->filename
, FILENAME_MANUAL
);
256 ret
->filename
= smalloc(strlen(FILENAME_CONTENTS
)+1);
257 sprintf(ret
->filename
, FILENAME_CONTENTS
);
260 paragraph
*p
= sect
->para
;
261 rdstringc fname_c
= { 0, 0, NULL
};
264 for (w
=(p
->kwtext
)?
(p
->kwtext
):(p
->words
); w
; w
=w
->next
)
266 switch (removeattr(w
->type
))
271 case word_WeakCode:*/
272 xhtml_utostr(w
->text
, &c
);
278 rdaddsc(&fname_c
, ".html");
279 ret
->filename
= rdtrimc(&fname_c
);
281 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
286 * Walk the tree fixing up files which are actually leaf (ie
287 * have no children) but aren't at leaf level, so they have the
290 void xhtml_fixup_layout(xhtmlfile
* file
)
292 if (file
->child
==NULL
) {
293 file
->is_leaf
= TRUE
;
295 xhtml_fixup_layout(file
->child
);
298 xhtml_fixup_layout(file
->next
);
302 * Create the tree structure so we know where everything goes.
305 * Ignoring file splitting, we have three choices with each new section:
307 * +-----------------+-----------------+
315 * Y is the last section we added (currentsect).
316 * If sect is the section we want to add, then:
318 * (1) if sect->level < currentsect->level
319 * (2) if sect->level == currentsect->level
320 * (3) if sect->level > currentsect->level
322 * This requires the constraint that you never skip section numbers
323 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
325 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
326 * more than one level at a time. Lots of asserts, and probably part of
327 * the algorithm here, rely on this being true. (It currently isn't
328 * enforced by halibut, however.)
330 * File splitting makes this harder. For instance, say we added at (3)
331 * above and now need to add another section. We are splitting at level
332 * 2, ie the level of Y. Z is the last section we added:
334 * +-----------------+-----------------+
344 * The (1) case is now split; we need to search upwards to find where
345 * to actually link in. The other two cases remain the same (and will
346 * always be like this).
348 * File splitting makes this harder, however. The decision of whether
349 * to split to a new file is always on the same condition, however (is
350 * the level of this section higher than the leaf_level configuration
353 * Treating the cases backwards:
355 * (3) same file if sect->level > conf.leaf_level, otherwise new file
357 * if in the same file, currentsect->child points to sect
358 * otherwise the linking is done through the file tree (which works
359 * in more or less the same way, ie currentfile->child points to
362 * (2) same file if sect->level > conf.leaf_level, otherwise new file
364 * if in the same file, currentsect->next points to sect
365 * otherwise file linking and currentfile->next points to the new
366 * file (we know that Z must have caused a new file to be created)
368 * (1) same file if sect->level > conf.leaf_level, otherwise new file
370 * this is actually effectively the same case as (2) here,
371 * except that we first have to travel up the sections to figure
372 * out which section this new one will be a sibling of. In doing
373 * so, we may disappear off the top of a file and have to go up
374 * to its parent in the file tree.
377 static void xhtml_ponder_layout(paragraph
*p
)
379 xhtmlsection
*lastsection
;
380 xhtmlsection
*currentsect
;
381 xhtmlfile
*currentfile
;
384 topsection
= xhtml_new_section(NULL
);
385 topfile
= xhtml_new_file(NULL
);
386 lastsection
= topsection
;
387 currentfile
= topfile
;
388 currentsect
= topsection
;
390 if (conf
.leaf_level
== 0) {
391 topfile
->is_leaf
= 1;
392 topfile
->sections
= topsection
;
393 topsection
->file
= topfile
;
398 int level
= xhtml_para_level(p
);
399 if (level
>0) /* actually a section */
404 rdstringc fname_c
= { 0, 0, NULL
};
406 sect
= xhtml_new_section(lastsection
);
409 for (w
=(p
->kwtext2
)?
(p
->kwtext2
):(p
->words
); w
; w
=w
->next
) /* kwtext2 because we want numbers only! */
411 switch (removeattr(w
->type
))
416 case word_WeakCode:*/
417 xhtml_utostr(w
->text
, &c
);
423 /* rdaddsc(&fname_c, ".html");*/
424 sect
->fragment
= rdtrimc(&fname_c
);
426 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
428 if (level
>currentsect
->level
) { /* case (3) */
429 if (level
>conf
.leaf_level
) { /* same file */
430 assert(currentfile
->is_leaf
);
431 currentsect
->child
= sect
;
432 sect
->parent
=currentsect
;
433 sect
->file
=currentfile
;
434 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
436 } else { /* new file */
437 xhtmlfile
*file
= xhtml_new_file(sect
);
438 assert(!currentfile
->is_leaf
);
439 currentfile
->child
=file
;
441 file
->parent
=currentfile
;
442 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
446 } else if (level
>= currentsect
->file
->sections
->level
) {
447 /* Case (1) or (2) *AND* still under the section that starts
450 * I'm not convinced that this couldn't be rolled in with the
451 * final else {} leg further down. It seems a lot of effort
454 if (level
>conf
.leaf_level
) { /* stick within the same file */
455 assert(currentfile
->is_leaf
);
456 sect
->file
= currentfile
;
457 while (currentsect
&& currentsect
->level
> level
&&
458 currentsect
->file
==currentsect
->parent
->file
) {
459 currentsect
= currentsect
->parent
;
462 currentsect
->next
= sect
;
463 assert(currentsect
->level
== sect
->level
);
464 sect
->parent
= currentsect
->parent
;
466 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
467 } else { /* new file */
468 xhtmlfile
*file
= xhtml_new_file(sect
);
470 currentfile
->next
=file
;
471 file
->parent
=currentfile
->parent
;
472 file
->is_leaf
=(level
==conf
.leaf_level
);
474 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
478 } else { /* Case (1) or (2) and we must move up the file tree first */
479 /* this loop is now probably irrelevant - we know we can't connect
480 * to anything in the current file */
481 while (currentsect
&& level
<currentsect
->level
) {
482 currentsect
=currentsect
->parent
;
484 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
486 /* printf(" * up one level (off top of current file)\n");*/
490 /* I'm pretty sure this can now never fire */
491 assert(currentfile
->is_leaf
);
492 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
493 sect
->file
= currentfile
;
494 currentsect
->next
=sect
;
496 } else { /* find a file we can attach to */
497 while (currentfile
&& currentfile
->sections
&& level
<currentfile
->sections
->level
) {
498 currentfile
=currentfile
->parent
;
500 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
502 /* printf(" * up one file level (off top of tree)\n");*/
505 if (currentfile
) { /* new file (we had to skip up a file to
506 get here, so we must be dealing with a
507 level no lower than the configured
509 xhtmlfile
*file
= xhtml_new_file(sect
);
510 currentfile
->next
=file
;
512 file
->parent
=currentfile
->parent
;
513 file
->is_leaf
=(level
==conf
.leaf_level
);
515 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
519 fatal(err_whatever
, "Ran off the top trying to connect sibling: strange document.");
525 topsection
= lastsection
; /* get correct end of the chain */
526 xhtml_fixup_layout(topfile
); /* leaf files not at leaf level marked as such */
529 static void xhtml_do_index();
530 static void xhtml_do_file(xhtmlfile
*file
);
531 static void xhtml_do_top_file(xhtmlfile
*file
, paragraph
*sourceform
);
532 static void xhtml_do_paras(FILE *fp
, paragraph
*p
);
533 static int xhtml_do_contents_limit(FILE *fp
, xhtmlfile
*file
, int limit
);
534 static int xhtml_do_contents_section_limit(FILE *fp
, xhtmlsection
*section
, int limit
);
535 static int xhtml_add_contents_entry(FILE *fp
, xhtmlsection
*section
, int limit
);
536 static int xhtml_do_contents(FILE *fp
, xhtmlfile
*file
);
537 static int xhtml_do_naked_contents(FILE *fp
, xhtmlfile
*file
);
538 static void xhtml_do_sections(FILE *fp
, xhtmlsection
*sections
);
541 * Do all the files in this structure.
543 static void xhtml_do_files(xhtmlfile
*file
)
547 xhtml_do_files(file
->child
);
549 xhtml_do_files(file
->next
);
553 * Free up all memory used by the file tree from 'xfile' downwards
555 static void xhtml_free_file(xhtmlfile
* xfile
)
561 if (xfile
->filename
) {
562 sfree(xfile
->filename
);
564 xhtml_free_file(xfile
->child
);
565 xhtml_free_file(xfile
->next
);
572 void xhtml_backend(paragraph
*sourceform
, keywordlist
*in_keywords
,
580 sourceparas
= sourceform
;
581 conf
= xhtml_configure(sourceform
);
582 keywords
= in_keywords
;
585 /* Clear up the index entries backend data pointers */
586 for (ti
=0; (ientry
= (indexentry
*)index234(idx
->entries
, ti
))!=NULL
; ti
++) {
587 ientry
->backend_data
=NULL
;
590 xhtml_ponder_layout(sourceform
);
592 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
593 /* xhtml_level_0(sourceform);
594 for (i=1; i<=conf.leaf_level; i++)
596 xhtml_level(sourceform, i);
599 /* new system ... (writes to *.html, but isn't fully trusted) */
600 xhtml_do_top_file(topfile
, sourceform
);
601 assert(!topfile
->next
); /* shouldn't have a sibling at all */
602 if (topfile
->child
) {
603 xhtml_do_files(topfile
->child
);
607 /* release file, section, index data structures */
610 xhtmlsection
*tmp
= xsect
->chain
;
611 if (xsect
->fragment
) {
612 sfree(xsect
->fragment
);
617 xhtml_free_file(topfile
);
618 for (ti
= 0; (ientry
=(indexentry
*)index234(idx
->entries
, ti
))!=NULL
; ti
++) {
619 if (ientry
->backend_data
!=NULL
) {
620 xhtmlindex
*xi
= (xhtmlindex
*) ientry
->backend_data
;
621 if (xi
->sections
!=NULL
) {
626 ientry
->backend_data
= NULL
;
630 static int xhtml_para_level(paragraph
*p
)
637 case para_UnnumberedChapter
:
642 /* case para_BiblioCited:
655 static char* xhtml_index_filename
= "IndexPage.html";
657 /* Output the nav links for the current file.
658 * file == NULL means we're doing the index
660 static void xhtml_donavlinks(FILE *fp
, xhtmlfile
*file
)
662 xhtmlfile
*xhtml_next_file
= NULL
;
664 if (conf
.nav_attrs
!=NULL
) {
665 fprintf(fp
, " %ls>", conf
.nav_attrs
);
669 if (xhtml_last_file
==NULL
) {
670 fprintf(fp
, "Previous | ");
672 fprintf(fp
, "<a href='%s'>Previous</a> | ", xhtml_last_file
->filename
);
674 fprintf(fp
, "<a href='Contents.html'>Contents</a> | ");
675 if (file
!= NULL
) { /* otherwise we're doing nav links for the index */
676 if (xhtml_next_file
==NULL
)
677 xhtml_next_file
= file
->child
;
678 if (xhtml_next_file
==NULL
)
679 xhtml_next_file
= file
->next
;
680 if (xhtml_next_file
==NULL
)
681 xhtml_next_file
= file
->parent
->next
;
683 if (xhtml_next_file
==NULL
) {
684 if (file
==NULL
) { /* index, so no next file */
685 fprintf(fp
, "Next ");
687 fprintf(fp
, "<a href='%s'>Next</a>", xhtml_index_filename
);
690 fprintf(fp
, "<a href='%s'>Next</a>", xhtml_next_file
->filename
);
692 fprintf(fp
, "</p>\n");
695 /* Write out the index file */
696 static void xhtml_do_index_body(FILE *fp
)
701 if (count234(idx
->entries
) == 0)
702 return; /* don't write anything at all */
704 fprintf(fp
, "<dl>\n");
705 /* iterate over idx->entries using the tree functions and display everything */
706 for (ti
= 0; (y
= (indexentry
*)index234(idx
->entries
, ti
)) != NULL
; ti
++) {
707 if (y
->backend_data
) {
712 xhtml_para(fp
, y
->text
);
713 fprintf(fp
, "</dt>\n<dd>");
715 xi
= (xhtmlindex
*) y
->backend_data
;
716 for (i
=0; i
<xi
->nsection
; i
++) {
717 xhtmlsection
*sect
= xi
->sections
[i
];
719 fprintf(fp
, "<a href='%s#%s'>", sect
->file
->filename
, sect
->fragment
);
720 if (sect
->para
->kwtext
) {
721 xhtml_para(fp
, sect
->para
->kwtext
);
722 } else if (sect
->para
->words
) {
723 xhtml_para(fp
, sect
->para
->words
);
726 if (i
+1<xi
->nsection
) {
731 fprintf(fp
, "</dd>\n");
734 fprintf(fp
, "</dl>\n");
736 static void xhtml_do_index()
738 word temp_word
= { NULL
, NULL
, word_Normal
, 0, 0, L
"Index", { NULL
, 0, 0} };
739 FILE *fp
= fopen(xhtml_index_filename
, "w");
742 fatal(err_cantopenw
, xhtml_index_filename
);
743 xhtml_doheader(fp
, &temp_word
);
744 xhtml_donavlinks(fp
, NULL
);
746 xhtml_do_index_body(fp
);
748 xhtml_donavlinks(fp
, NULL
);
753 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
754 static void xhtml_do_file(xhtmlfile
*file
)
756 FILE *fp
= fopen(file
->filename
, "w");
758 fatal(err_cantopenw
, file
->filename
);
760 if (file
->sections
->para
->words
) {
761 xhtml_doheader(fp
, file
->sections
->para
->words
);
762 } else if (file
->sections
->para
->kwtext
) {
763 xhtml_doheader(fp
, file
->sections
->para
->kwtext
);
765 xhtml_doheader(fp
, NULL
);
768 xhtml_donavlinks(fp
, file
);
770 if (file
->is_leaf
&& conf
.leaf_contains_contents
&&
771 xhtml_do_contents(NULL
, file
)>=conf
.leaf_smallest_contents
)
772 xhtml_do_contents(fp
, file
);
773 xhtml_do_sections(fp
, file
->sections
);
775 xhtml_do_naked_contents(fp
, file
);
777 xhtml_donavlinks(fp
, file
);
782 xhtml_last_file
= file
;
785 /* Output the top-level file. */
786 static void xhtml_do_top_file(xhtmlfile
*file
, paragraph
*sourceform
)
790 FILE *fp
= fopen(file
->filename
, "w");
792 fatal(err_cantopenw
, file
->filename
);
794 /* Do the title -- only one allowed */
795 for (p
= sourceform
; p
&& !done
; p
= p
->next
)
797 if (p
->type
== para_Title
)
799 xhtml_doheader(fp
, p
->words
);
804 xhtml_doheader(fp
, NULL
/* Eek! */);
809 for (p
= sourceform
; p
; p
= p
->next
)
811 if (p
->type
== para_Title
) {
812 xhtml_heading(fp
, p
);
817 /* Do the preamble and copyright */
818 for (p
= sourceform
; p
; p
= p
->next
)
820 if (p
->type
== para_Preamble
)
823 xhtml_para(fp
, p
->words
);
824 fprintf(fp
, "</p>\n");
827 for (p
= sourceform
; p
; p
= p
->next
)
829 if (p
->type
== para_Copyright
)
832 xhtml_para(fp
, p
->words
);
833 fprintf(fp
, "</p>\n");
837 xhtml_do_contents(fp
, file
);
838 xhtml_do_sections(fp
, file
->sections
);
840 if (count234(idx
->entries
) > 0) {
841 fprintf(fp
, "<a name=\"index\"></a><h1>Index</h1>\n");
842 xhtml_do_index_body(fp
);
849 /* Convert a Unicode string to an ASCII one. '?' is
850 * used for unmappable characters.
852 static void xhtml_utostr(wchar_t *in
, char **out
)
859 if (in
[i
]>=32 && in
[i
]<=126)
860 (*out
)[i
]=(char)in
[i
];
868 * Write contents for the given file, and subfiles, down to
869 * the appropriate contents depth. Returns the number of
872 static int xhtml_do_contents(FILE *fp
, xhtmlfile
*file
)
874 int level
, limit
, start_level
, count
= 0;
878 level
= (file
->sections
)?
(file
->sections
->level
):(0);
879 limit
= conf
.contents_depth
[(level
>5)?
(5):(level
)];
880 start_level
= (file
->is_leaf
) ?
(level
-1) : (level
);
881 last_level
= start_level
;
883 count
+= xhtml_do_contents_section_limit(fp
, file
->sections
, limit
);
884 count
+= xhtml_do_contents_limit(fp
, file
->child
, limit
);
886 while (last_level
> start_level
) {
888 fprintf(fp
, "</ul>\n");
894 /* As above, but doesn't do anything in the current file */
895 static int xhtml_do_naked_contents(FILE *fp
, xhtmlfile
*file
)
897 int level
, limit
, start_level
, count
= 0;
901 level
= (file
->sections
)?
(file
->sections
->level
):(0);
902 limit
= conf
.contents_depth
[(level
>5)?
(5):(level
)];
903 start_level
= (file
->is_leaf
) ?
(level
-1) : (level
);
904 last_level
= start_level
;
906 count
= xhtml_do_contents_limit(fp
, file
->child
, limit
);
908 while (last_level
> start_level
) {
910 fprintf(fp
, "</ul>\n");
917 * Write contents for the given file, children, and siblings, down to
918 * given limit contents depth.
920 static int xhtml_do_contents_limit(FILE *fp
, xhtmlfile
*file
, int limit
)
924 count
+= xhtml_do_contents_section_limit(fp
, file
->sections
, limit
);
925 count
+= xhtml_do_contents_limit(fp
, file
->child
, limit
);
932 * Write contents entries for the given section tree, down to the
933 * limit contents depth.
935 static int xhtml_do_contents_section_deep_limit(FILE *fp
, xhtmlsection
*section
, int limit
)
939 if (!xhtml_add_contents_entry(fp
, section
, limit
))
943 count
+= xhtml_do_contents_section_deep_limit(fp
, section
->child
, limit
);
944 section
= section
->next
;
950 * Write contents entries for the given section tree, down to the
951 * limit contents depth.
953 static int xhtml_do_contents_section_limit(FILE *fp
, xhtmlsection
*section
, int limit
)
958 xhtml_add_contents_entry(fp
, section
, limit
);
960 count
+= xhtml_do_contents_section_deep_limit(fp
, section
->child
, limit
);
961 /* section=section->child;
962 while (section && xhtml_add_contents_entry(fp, section, limit)) {
963 section = section->next;
969 * Add a section entry, unless we're exceeding the limit, in which
970 * case return FALSE (otherwise return TRUE).
972 static int xhtml_add_contents_entry(FILE *fp
, xhtmlsection
*section
, int limit
)
974 if (!section
|| section
->level
> limit
)
976 if (fp
==NULL
|| !section
->parent
)
978 while (last_level
> section
->level
) {
980 fprintf(fp
, "</ul>\n");
982 while (last_level
< section
->level
) {
984 fprintf(fp
, "<ul>\n");
986 fprintf(fp
, "<li><a href=\"%s#%s\">", section
->file
->filename
, section
->fragment
);
987 if (section
->para
->kwtext
) {
988 xhtml_para(fp
, section
->para
->kwtext
);
989 if (section
->para
->words
) {
993 if (section
->para
->words
) {
994 xhtml_para(fp
, section
->para
->words
);
996 fprintf(fp
, "</a></li>\n");
1001 * Write all the sections in this file. Do all paragraphs in this section, then all
1002 * children (recursively), then go on to the next one (tail recursively).
1004 static void xhtml_do_sections(FILE *fp
, xhtmlsection
*sections
)
1007 currentsection
= sections
;
1008 xhtml_do_paras(fp
, sections
->para
);
1009 xhtml_do_sections(fp
, sections
->child
);
1010 sections
= sections
->next
;
1014 /* Write this list of paragraphs. Close off all lists at the end. */
1015 static void xhtml_do_paras(FILE *fp
, paragraph
*p
)
1017 int last_type
= -1, first
=TRUE
;
1021 /* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1022 for (; p
&& (xhtml_para_level(p
)==-1 || first
); p
=p
->next
) {
1027 * Things we ignore because we've already processed them or
1028 * aren't going to touch them in this pass.
1032 case para_Biblio
: /* only touch BiblioCited */
1033 case para_VersionID
:
1034 case para_Copyright
:
1045 case para_UnnumberedChapter
:
1046 xhtml_heading(fp
, p
);
1051 xhtml_heading(fp
, p
);
1055 fprintf(fp
, "\n<hr />\n");
1059 fprintf(fp
, "\n<p>");
1060 xhtml_para(fp
, p
->words
);
1061 fprintf(fp
, "</p>\n");
1065 case para_NumberedList
:
1066 case para_BiblioCited
:
1067 if (last_type
!=p
->type
) {
1068 /* start up list if necessary */
1069 if (p
->type
== para_Bullet
) {
1070 fprintf(fp
, "<ul>\n");
1071 } else if (p
->type
== para_NumberedList
) {
1072 fprintf(fp
, "<ol>\n");
1073 } else if (p
->type
== para_BiblioCited
) {
1074 fprintf(fp
, "<dl>\n");
1077 if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
)
1078 fprintf(fp
, "<li>");
1079 else if (p
->type
== para_BiblioCited
) {
1080 fprintf(fp
, "<dt>");
1081 xhtml_para(fp
, p
->kwtext
);
1082 fprintf(fp
, "</dt>\n<dd>");
1084 xhtml_para(fp
, p
->words
);
1085 if (p
->type
== para_BiblioCited
) {
1086 fprintf(fp
, "</dd>\n");
1087 } else if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
) {
1088 fprintf(fp
, "</li>");
1090 if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
|| p
->type
== para_BiblioCited
)
1091 /* close off list if necessary */
1093 paragraph
*p2
= p
->next
;
1094 int close_off
=FALSE
;
1095 /* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1096 if (p2
&& xhtml_para_level(p2
)==-1) {
1097 if (p2
->type
!= p
->type
)
1103 if (p
->type
== para_Bullet
) {
1104 fprintf(fp
, "</ul>\n");
1105 } else if (p
->type
== para_NumberedList
) {
1106 fprintf(fp
, "</ol>\n");
1107 } else if (p
->type
== para_BiblioCited
) {
1108 fprintf(fp
, "</dl>\n");
1115 xhtml_codepara(fp
, p
->words
);
1118 last_type
= p
->type
;
1123 * Output a header for this XHTML file.
1125 static void xhtml_doheader(FILE *fp
, word
*title
)
1127 fprintf(fp
, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1128 fprintf(fp
, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1129 fprintf(fp
, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1131 fprintf(fp
, "The thing with no name!");
1133 xhtml_para(fp
, title
);
1134 fprintf(fp
, "</title>\n");
1135 fprintf(fp
, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version
);
1137 fprintf(fp
, "<meta name=\"author\" content=\"%ls\" />\n", conf
.author
);
1138 if (conf
.description
)
1139 fprintf(fp
, "<meta name=\"description\" content=\"%ls\" />\n", conf
.description
);
1141 fprintf(fp
, "%ls\n", conf
.head_end
);
1142 fprintf(fp
, "</head>\n\n");
1144 fprintf(fp
, "%ls\n", conf
.body
);
1146 fprintf(fp
, "<body>\n");
1147 if (conf
.body_start
)
1148 fprintf(fp
, "%ls\n", conf
.body_start
);
1152 * Output a footer for this XHTML file.
1154 static void xhtml_dofooter(FILE *fp
)
1156 fprintf(fp
, "\n<hr />\n\n");
1158 fprintf(fp
, "%ls\n", conf
.body_end
);
1159 if (!conf
.suppress_address
) {
1160 fprintf(fp
,"<address>\n");
1161 if (conf
.address_start
)
1162 fprintf(fp
, "%ls\n", conf
.address_start
);
1163 /* Do the version ID */
1164 if (conf
.include_version_id
) {
1167 for (p
= sourceparas
; p
; p
= p
->next
)
1168 if (p
->type
== para_VersionID
) {
1169 xhtml_versionid(fp
, p
->words
, started
);
1173 if (conf
.address_end
)
1174 fprintf(fp
, "%ls\n", conf
.address_end
);
1175 fprintf(fp
, "</address>\n");
1177 fprintf(fp
, "</body>\n\n</html>\n");
1181 * Output the versionid paragraph. Typically this is a version control
1182 * ID string (such as $Id...$ in RCS).
1184 static void xhtml_versionid(FILE *fp
, word
*text
, int started
)
1186 rdstringc t
= { 0, 0, NULL
};
1188 rdaddc(&t
, '['); /* FIXME: configurability */
1189 xhtml_rdaddwc(&t
, text
, NULL
);
1190 rdaddc(&t
, ']'); /* FIXME: configurability */
1193 fprintf(fp
, "<br>\n");
1194 fprintf(fp
, "%s\n", t
.text
);
1198 /* Is this an XHTML reserved character? */
1199 static int xhtml_reservedchar(int c
)
1201 if (c
=='&' || c
=='<' || c
=='>' || c
=='"')
1208 * Convert a wide string into valid XHTML: Anything outside ASCII will
1209 * be fixed up as an entity. Currently we don't worry about constraining the
1210 * encoded character set, which we should probably do at some point (we can
1211 * still fix up and return FALSE - see the last comment here). We also don't
1214 * Because this is only used for words, spaces are HARD spaces (any other
1215 * spaces will be word_Whitespace not word_Normal). So they become
1216 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1219 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1220 * it in `*result'. If `result' is NULL, merely checks whether all
1221 * characters in the string are feasible.
1223 * Return is nonzero if all characters are OK. If not all
1224 * characters are OK but `result' is non-NULL, a result _will_
1225 * still be generated!
1227 static int xhtml_convert(wchar_t *s
, char **result
, int hard_spaces
) {
1228 int doing
= (result
!= 0);
1231 int plen
= 0, psize
= 0;
1236 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1238 if (((c
== 32 && !hard_spaces
) || (c
> 32 && c
<= 126 && !xhtml_reservedchar(c
)))) {
1243 p
[plen
++] = (char)c
;
1246 /* Char needs fixing up. */
1247 /* ok = FALSE; -- currently we never return FALSE; we
1248 * might want to when considering a character set for the
1253 if (c
==32) { /* a space in a word is a hard space */
1254 ensure_size(plen
+6); /* includes space for the NUL, which is subsequently stomped on */
1255 sprintf(p
+plen
, " ");
1258 /* FIXME: entity names! */
1259 ensure_size(plen
+8); /* includes space for the NUL, which is subsequently stomped on */
1260 plen
+=sprintf(p
+plen
, "&#%04i;", (int)c
);
1266 p
= resize(p
, plen
+1);
1274 * This formats the given words as XHTML.
1276 static void xhtml_rdaddwc(rdstringc
*rs
, word
*text
, word
*end
) {
1283 for (; text
&& text
!= end
; text
= text
->next
) {
1284 switch (text
->type
) {
1285 case word_HyperLink
:
1286 xhtml_utostr(text
->text
, &c
);
1287 rdaddsc(rs
, "<a href=\"");
1293 case word_UpperXref
:
1294 case word_LowerXref
:
1295 kwl
= kw_lookup(keywords
, text
->text
);
1297 sect
=xhtml_find_section(kwl
->para
);
1299 rdaddsc(rs
, "<a href=\"");
1300 rdaddsc(rs
, sect
->file
->filename
);
1302 rdaddsc(rs
, sect
->fragment
);
1305 rdaddsc(rs
, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1306 error(err_whatever
, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1309 rdaddsc(rs
, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1310 error(err_whatever
, "Couldn't locate cross-reference! (Wasn't in source file.)");
1314 case word_IndexRef
: /* in theory we could make an index target here */
1315 /* rdaddsc(rs, "<a name=\"idx-");
1316 xhtml_utostr(text->text, &c);
1319 rdaddsc(rs, "\"></a>");*/
1320 /* what we _do_ need to do is to fix up the backend data
1321 * for any indexentry this points to.
1323 for (ti
=0; (itag
= (indextag
*)index234(idx
->tags
, ti
))!=NULL
; ti
++) {
1324 /* FIXME: really ustricmp() and not ustrcmp()? */
1325 if (ustricmp(itag
->name
, text
->text
)==0) {
1330 if (itag
->refs
!=NULL
) {
1332 for (i
=0; i
<itag
->nrefs
; i
++) {
1333 xhtmlindex
*idx_ref
;
1336 ientry
= itag
->refs
[i
];
1337 if (ientry
->backend_data
==NULL
) {
1338 idx_ref
= (xhtmlindex
*) smalloc(sizeof(xhtmlindex
));
1340 fatal(err_nomemory
);
1341 idx_ref
->nsection
= 0;
1343 idx_ref
->sections
= (xhtmlsection
**) smalloc(idx_ref
->size
* sizeof(xhtmlsection
*));
1344 if (idx_ref
->sections
==NULL
)
1345 fatal(err_nomemory
);
1346 ientry
->backend_data
= idx_ref
;
1348 idx_ref
= ientry
->backend_data
;
1349 if (idx_ref
->nsection
+1 > idx_ref
->size
) {
1350 int new_size
= idx_ref
->size
* 2;
1351 idx_ref
->sections
= srealloc(idx_ref
->sections
, new_size
* sizeof(xhtmlsection
));
1352 if (idx_ref
->sections
==NULL
) {
1353 fatal(err_nomemory
);
1355 idx_ref
->size
= new_size
;
1358 idx_ref
->sections
[idx_ref
->nsection
++] = currentsection
;
1363 fatal(err_whatever
, "Index tag had no entries!");
1366 fprintf(stderr
, "Looking for index entry '%ls'\n", text
->text
);
1367 fatal(err_whatever
, "Couldn't locate index entry! (Wasn't in index.)");
1373 rdaddsc(rs
, "</a>");
1380 case word_WhiteSpace
:
1381 case word_EmphSpace
:
1382 case word_CodeSpace
:
1383 case word_WkCodeSpace
:
1385 case word_EmphQuote
:
1386 case word_CodeQuote
:
1387 case word_WkCodeQuote
:
1388 assert(text
->type
!= word_CodeQuote
&&
1389 text
->type
!= word_WkCodeQuote
);
1390 if (towordstyle(text
->type
) == word_Emph
&&
1391 (attraux(text
->aux
) == attr_First
||
1392 attraux(text
->aux
) == attr_Only
))
1393 rdaddsc(rs
, "<em>");
1394 else if ((towordstyle(text
->type
) == word_Code
|| towordstyle(text
->type
) == word_WeakCode
) &&
1395 (attraux(text
->aux
) == attr_First
||
1396 attraux(text
->aux
) == attr_Only
))
1397 rdaddsc(rs
, "<code>");
1399 if (removeattr(text
->type
) == word_Normal
) {
1400 if (xhtml_convert(text
->text
, &c
, TRUE
)) /* spaces in the word are hard */
1403 xhtml_rdaddwc(rs
, text
->alt
, NULL
);
1405 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
1407 } else if (removeattr(text
->type
) == word_Quote
) {
1408 rdaddsc(rs
, """);
1411 if (towordstyle(text
->type
) == word_Emph
&&
1412 (attraux(text
->aux
) == attr_Last
||
1413 attraux(text
->aux
) == attr_Only
))
1414 rdaddsc(rs
, "</em>");
1415 else if ((towordstyle(text
->type
) == word_Code
|| towordstyle(text
->type
) == word_WeakCode
) &&
1416 (attraux(text
->aux
) == attr_Last
||
1417 attraux(text
->aux
) == attr_Only
))
1418 rdaddsc(rs
, "</code>");
1424 /* Output a heading, formatted as XHTML.
1426 static void xhtml_heading(FILE *fp
, paragraph
*p
)
1428 rdstringc t
= { 0, 0, NULL
};
1429 word
*tprefix
= p
->kwtext
;
1430 word
*nprefix
= p
->kwtext2
;
1431 word
*text
= p
->words
;
1432 int level
= xhtml_para_level(p
);
1433 xhtmlsection
*sect
= xhtml_find_section(p
);
1436 fragment
= sect
->fragment
;
1438 if (p
->type
== para_Title
)
1441 fragment
= ""; /* FIXME: what else can we do? */
1442 error(err_whatever
, "Couldn't locate heading cross-reference!");
1446 if (level
>2 && nprefix
) { /* FIXME: configurability on the level thing */
1447 xhtml_rdaddwc(&t
, nprefix
, NULL
);
1448 rdaddc(&t
, ' '); /* FIXME: as below */
1449 } else if (tprefix
) {
1450 xhtml_rdaddwc(&t
, tprefix
, NULL
);
1451 rdaddsc(&t
, ": "); /* FIXME: configurability */
1453 xhtml_rdaddwc(&t
, text
, NULL
);
1455 * If we're outputting in single-file mode, we need to lower
1456 * the level of each heading by one, because the overall
1457 * document title will be sitting right at the top as an <h1>
1458 * and so chapters and sections should start at <h2>.
1460 * Even if not, the document title will come back from
1461 * xhtml_para_level() as level zero, so we must increment that
1462 * no matter what leaf_level is set to.
1464 if (conf
.leaf_level
== 0 || level
== 0)
1466 fprintf(fp
, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment
, level
, t
.text
, level
);
1470 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1471 * This looks pretty simple; I may have missed something ...
1473 static void xhtml_para(FILE *fp
, word
*text
)
1475 rdstringc out
= { 0, 0, NULL
};
1476 xhtml_rdaddwc(&out
, text
, NULL
);
1477 fprintf(fp
, "%s", out
.text
);
1481 /* Output a code paragraph. I'm treating this as preformatted, which
1482 * may not be entirely correct. See xhtml_para() for my worries about
1483 * this being overly-simple; however I think that most of the complexity
1484 * of the text backend came entirely out of word wrapping anyway.
1486 static void xhtml_codepara(FILE *fp
, word
*text
)
1488 fprintf(fp
, "<pre>");
1489 for (; text
; text
= text
->next
) if (text
->type
== word_WeakCode
) {
1491 xhtml_convert(text
->text
, &c
, FALSE
);
1492 fprintf(fp
, "%s\n", c
);
1495 fprintf(fp
, "</pre>\n");