2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
34 struct xhtmlsection_Struct
{
35 struct xhtmlsection_Struct
*next
; /* next sibling (NULL if split across files) */
36 struct xhtmlsection_Struct
*child
; /* NULL if split across files */
37 struct xhtmlsection_Struct
*parent
; /* NULL if split across files */
38 struct xhtmlsection_Struct
*chain
; /* single structure independent of weird trees */
40 struct xhtmlfile_Struct
*file
; /* which file is this a part of? */
41 char *fragment
; /* fragment id within the file */
45 struct xhtmlfile_Struct
{
46 struct xhtmlfile_Struct
*next
;
47 struct xhtmlfile_Struct
*child
;
48 struct xhtmlfile_Struct
*parent
;
50 struct xhtmlsection_Struct
*sections
; /* sections within this file (only one for non-leaf) */
51 int is_leaf
; /* is this file a leaf file, ie does it not have any children? */
54 typedef struct xhtmlsection_Struct xhtmlsection
;
55 typedef struct xhtmlfile_Struct xhtmlfile
;
56 typedef struct xhtmlindex_Struct xhtmlindex
;
58 struct xhtmlindex_Struct
{
61 xhtmlsection
**sections
;
65 int contents_depth
[6];
66 int leaf_contains_contents
;
68 int leaf_smallest_contents
;
69 int include_version_id
;
70 wchar_t *author
, *description
;
71 wchar_t *head_end
, *body
, *body_start
, *body_end
, *address_start
, *address_end
, *nav_attrs
;
75 /*static void xhtml_level(paragraph *, int);
76 static void xhtml_level_0(paragraph *);
77 static void xhtml_docontents(FILE *, paragraph *, int);
78 static void xhtml_dosections(FILE *, paragraph *, int);
79 static void xhtml_dobody(FILE *, paragraph *, int);*/
81 static void xhtml_doheader(FILE *, word
*);
82 static void xhtml_dofooter(FILE *);
83 static void xhtml_versionid(FILE *, word
*, int);
85 static void xhtml_utostr(wchar_t *, char **);
86 static int xhtml_para_level(paragraph
*);
87 static int xhtml_reservedchar(int);
89 static int xhtml_convert(wchar_t *, char **, int);
90 static void xhtml_rdaddwc(rdstringc
*, word
*, word
*);
91 static void xhtml_para(FILE *, word
*);
92 static void xhtml_codepara(FILE *, word
*);
93 static void xhtml_heading(FILE *, paragraph
*);
95 /* File-global variables are much easier than passing these things
96 * all over the place. Evil, but easier. We can replace this with a single
97 * structure at some point.
99 static xhtmlconfig conf
;
100 static keywordlist
*keywords
;
101 static indexdata
*idx
;
102 static xhtmlfile
*topfile
;
103 static xhtmlsection
*topsection
;
104 static paragraph
*sourceparas
;
105 static xhtmlfile
*lastfile
;
106 static xhtmlfile
*xhtml_last_file
= NULL
;
107 static int last_level
=-1;
108 static xhtmlsection
*currentsection
;
110 static xhtmlconfig
xhtml_configure(paragraph
*source
)
117 ret
.contents_depth
[0] = 2;
118 ret
.contents_depth
[1] = 3;
119 ret
.contents_depth
[2] = 4;
120 ret
.contents_depth
[3] = 5;
121 ret
.contents_depth
[4] = 6;
122 ret
.contents_depth
[5] = 7;
124 ret
.leaf_smallest_contents
= 4;
125 ret
.leaf_contains_contents
= FALSE
;
126 ret
.include_version_id
= TRUE
;
128 ret
.description
= NULL
;
131 ret
.body_start
= NULL
;
133 ret
.address_start
= NULL
;
134 ret
.address_end
= NULL
;
135 ret
.nav_attrs
= NULL
;
136 ret
.suppress_address
= FALSE
;
138 for (; source
; source
= source
->next
)
140 if (source
->type
== para_Config
)
142 if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-0")) {
143 ret
.contents_depth
[0] = utoi(uadv(source
->keyword
));
144 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-1")) {
145 ret
.contents_depth
[1] = utoi(uadv(source
->keyword
));
146 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-2")) {
147 ret
.contents_depth
[2] = utoi(uadv(source
->keyword
));
148 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-3")) {
149 ret
.contents_depth
[3] = utoi(uadv(source
->keyword
));
150 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-4")) {
151 ret
.contents_depth
[4] = utoi(uadv(source
->keyword
));
152 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-5")) {
153 ret
.contents_depth
[5] = utoi(uadv(source
->keyword
));
154 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-level")) {
155 ret
.leaf_level
= utoi(uadv(source
->keyword
));
156 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-smallest-contents")) {
157 ret
.leaf_smallest_contents
= utoi(uadv(source
->keyword
));
158 } else if (!ustricmp(source
->keyword
, L
"xhtml-versionid")) {
159 ret
.include_version_id
= utob(uadv(source
->keyword
));
160 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-contains-contents")) {
161 ret
.leaf_contains_contents
= utob(uadv(source
->keyword
));
162 } else if (!ustricmp(source
->keyword
, L
"xhtml-suppress-address")) {
163 ret
.suppress_address
= utob(uadv(source
->keyword
));
164 } else if (!ustricmp(source
->keyword
, L
"xhtml-author")) {
165 ret
.author
= uadv(source
->keyword
);
166 } else if (!ustricmp(source
->keyword
, L
"xhtml-description")) {
167 ret
.description
= uadv(source
->keyword
);
168 } else if (!ustricmp(source
->keyword
, L
"xhtml-head-end")) {
169 ret
.head_end
= uadv(source
->keyword
);
170 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-start")) {
171 ret
.body_start
= uadv(source
->keyword
);
172 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-tag")) {
173 ret
.body
= uadv(source
->keyword
);
174 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-end")) {
175 ret
.body_end
= uadv(source
->keyword
);
176 } else if (!ustricmp(source
->keyword
, L
"xhtml-address-start")) {
177 ret
.address_start
= uadv(source
->keyword
);
178 } else if (!ustricmp(source
->keyword
, L
"xhtml-address-end")) {
179 ret
.address_end
= uadv(source
->keyword
);
180 } else if (!ustricmp(source
->keyword
, L
"xhtml-navigation-attributes")) {
181 ret
.nav_attrs
= uadv(source
->keyword
);
186 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
187 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
188 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
189 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
190 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
191 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
192 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
193 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
197 static xhtmlsection
*xhtml_new_section(xhtmlsection
*last
)
199 xhtmlsection
*ret
= mknew(xhtmlsection
);
207 ret
->level
=-1; /* marker: end of chain */
211 /* Returns NULL or the section that marks that paragraph */
212 static xhtmlsection
*xhtml_find_section(paragraph
*p
)
214 xhtmlsection
*ret
= topsection
;
215 if (xhtml_para_level(p
)==-1) { /* first, we back-track to a section paragraph */
216 paragraph
*p2
= sourceparas
;
217 paragraph
*p3
= NULL
;
218 while (p2
&& p2
!=p
) {
219 if (xhtml_para_level(p2
)!=-1) {
224 if (p3
==NULL
) { /* for some reason, we couldn't find a section before this paragraph ... ? */
225 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
226 * So don't do that, then.
232 while (ret
&& ret
->para
!= p
) {
233 /* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
239 static xhtmlfile
*xhtml_new_file(xhtmlsection
*sect
)
241 xhtmlfile
*ret
= mknew(xhtmlfile
);
248 ret
->is_leaf
=(sect
!=NULL
&& sect
->level
==conf
.leaf_level
);
250 if (conf
.leaf_level
==0) { /* currently unused */
251 #define FILENAME_MANUAL "Manual.html"
252 #define FILENAME_CONTENTS "Contents.html"
253 ret
->filename
= smalloc(strlen(FILENAME_MANUAL
)+1);
254 sprintf(ret
->filename
, FILENAME_MANUAL
);
256 ret
->filename
= smalloc(strlen(FILENAME_CONTENTS
)+1);
257 sprintf(ret
->filename
, FILENAME_CONTENTS
);
260 paragraph
*p
= sect
->para
;
261 rdstringc fname_c
= { 0, 0, NULL
};
264 for (w
=(p
->kwtext
)?
(p
->kwtext
):(p
->words
); w
; w
=w
->next
)
266 switch (removeattr(w
->type
))
271 case word_WeakCode:*/
272 xhtml_utostr(w
->text
, &c
);
278 rdaddsc(&fname_c
, ".html");
279 ret
->filename
= rdtrimc(&fname_c
);
281 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
286 * Walk the tree fixing up files which are actually leaf (ie
287 * have no children) but aren't at leaf level, so they have the
290 void xhtml_fixup_layout(xhtmlfile
* file
)
292 if (file
->child
==NULL
) {
293 file
->is_leaf
= TRUE
;
295 xhtml_fixup_layout(file
->child
);
298 xhtml_fixup_layout(file
->next
);
302 * Create the tree structure so we know where everything goes.
305 * Ignoring file splitting, we have three choices with each new section:
307 * +-----------------+-----------------+
315 * Y is the last section we added (currentsect).
316 * If sect is the section we want to add, then:
318 * (1) if sect->level < currentsect->level
319 * (2) if sect->level == currentsect->level
320 * (3) if sect->level > currentsect->level
322 * This requires the constraint that you never skip section numbers
323 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
325 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
326 * more than one level at a time. Lots of asserts, and probably part of
327 * the algorithm here, rely on this being true. (It currently isn't
328 * enforced by halibut, however.)
330 * File splitting makes this harder. For instance, say we added at (3)
331 * above and now need to add another section. We are splitting at level
332 * 2, ie the level of Y. Z is the last section we added:
334 * +-----------------+-----------------+
344 * The (1) case is now split; we need to search upwards to find where
345 * to actually link in. The other two cases remain the same (and will
346 * always be like this).
348 * File splitting makes this harder, however. The decision of whether
349 * to split to a new file is always on the same condition, however (is
350 * the level of this section higher than the leaf_level configuration
353 * Treating the cases backwards:
355 * (3) same file if sect->level > conf.leaf_level, otherwise new file
357 * if in the same file, currentsect->child points to sect
358 * otherwise the linking is done through the file tree (which works
359 * in more or less the same way, ie currentfile->child points to
362 * (2) same file if sect->level > conf.leaf_level, otherwise new file
364 * if in the same file, currentsect->next points to sect
365 * otherwise file linking and currentfile->next points to the new
366 * file (we know that Z must have caused a new file to be created)
368 * (1) same file if sect->level > conf.leaf_level, otherwise new file
370 * this is actually effectively the same case as (2) here,
371 * except that we first have to travel up the sections to figure
372 * out which section this new one will be a sibling of. In doing
373 * so, we may disappear off the top of a file and have to go up
374 * to its parent in the file tree.
377 static void xhtml_ponder_layout(paragraph
*p
)
379 xhtmlsection
*lastsection
;
380 xhtmlsection
*currentsect
;
381 xhtmlfile
*currentfile
;
384 topsection
= xhtml_new_section(NULL
);
385 topfile
= xhtml_new_file(NULL
);
386 lastsection
= topsection
;
387 currentfile
= topfile
;
388 currentsect
= topsection
;
390 if (conf
.leaf_level
== 0) {
391 topfile
->is_leaf
= 1;
392 topfile
->sections
= topsection
;
393 topsection
->file
= topfile
;
398 int level
= xhtml_para_level(p
);
399 if (level
>0) /* actually a section */
404 rdstringc fname_c
= { 0, 0, NULL
};
406 sect
= xhtml_new_section(lastsection
);
409 for (w
=(p
->kwtext2
)?
(p
->kwtext2
):(p
->words
); w
; w
=w
->next
) /* kwtext2 because we want numbers only! */
411 switch (removeattr(w
->type
))
416 case word_WeakCode:*/
417 xhtml_utostr(w
->text
, &c
);
423 /* rdaddsc(&fname_c, ".html");*/
424 sect
->fragment
= rdtrimc(&fname_c
);
426 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
428 if (level
>currentsect
->level
) { /* case (3) */
429 if (level
>conf
.leaf_level
) { /* same file */
430 assert(currentfile
->is_leaf
);
431 currentsect
->child
= sect
;
432 sect
->parent
=currentsect
;
433 sect
->file
=currentfile
;
434 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
436 } else { /* new file */
437 xhtmlfile
*file
= xhtml_new_file(sect
);
438 assert(!currentfile
->is_leaf
);
439 currentfile
->child
=file
;
441 file
->parent
=currentfile
;
442 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
446 } else if (level
>= currentsect
->file
->sections
->level
) {
447 /* Case (1) or (2) *AND* still under the section that starts
450 * I'm not convinced that this couldn't be rolled in with the
451 * final else {} leg further down. It seems a lot of effort
454 if (level
>conf
.leaf_level
) { /* stick within the same file */
455 assert(currentfile
->is_leaf
);
456 sect
->file
= currentfile
;
457 while (currentsect
&& currentsect
->level
> level
&&
458 currentsect
->file
==currentsect
->parent
->file
) {
459 currentsect
= currentsect
->parent
;
462 currentsect
->next
= sect
;
463 assert(currentsect
->level
== sect
->level
);
464 sect
->parent
= currentsect
->parent
;
466 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
467 } else { /* new file */
468 xhtmlfile
*file
= xhtml_new_file(sect
);
470 currentfile
->next
=file
;
471 file
->parent
=currentfile
->parent
;
472 file
->is_leaf
=(level
==conf
.leaf_level
);
474 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
478 } else { /* Case (1) or (2) and we must move up the file tree first */
479 /* this loop is now probably irrelevant - we know we can't connect
480 * to anything in the current file */
481 while (currentsect
&& level
<currentsect
->level
) {
482 currentsect
=currentsect
->parent
;
484 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
486 /* printf(" * up one level (off top of current file)\n");*/
490 /* I'm pretty sure this can now never fire */
491 assert(currentfile
->is_leaf
);
492 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
493 sect
->file
= currentfile
;
494 currentsect
->next
=sect
;
496 } else { /* find a file we can attach to */
497 while (currentfile
&& currentfile
->sections
&& level
<currentfile
->sections
->level
) {
498 currentfile
=currentfile
->parent
;
500 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
502 /* printf(" * up one file level (off top of tree)\n");*/
505 if (currentfile
) { /* new file (we had to skip up a file to
506 get here, so we must be dealing with a
507 level no lower than the configured
509 xhtmlfile
*file
= xhtml_new_file(sect
);
510 currentfile
->next
=file
;
512 file
->parent
=currentfile
->parent
;
513 file
->is_leaf
=(level
==conf
.leaf_level
);
515 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
519 fatal(err_whatever
, "Ran off the top trying to connect sibling: strange document.");
525 topsection
= lastsection
; /* get correct end of the chain */
526 xhtml_fixup_layout(topfile
); /* leaf files not at leaf level marked as such */
529 static void xhtml_do_index();
530 static void xhtml_do_file(xhtmlfile
*file
);
531 static void xhtml_do_top_file(xhtmlfile
*file
, paragraph
*sourceform
);
532 static void xhtml_do_paras(FILE *fp
, paragraph
*p
);
533 static int xhtml_do_contents_limit(FILE *fp
, xhtmlfile
*file
, int limit
);
534 static int xhtml_do_contents_section_limit(FILE *fp
, xhtmlsection
*section
, int limit
);
535 static int xhtml_add_contents_entry(FILE *fp
, xhtmlsection
*section
, int limit
);
536 static int xhtml_do_contents(FILE *fp
, xhtmlfile
*file
);
537 static int xhtml_do_naked_contents(FILE *fp
, xhtmlfile
*file
);
538 static void xhtml_do_sections(FILE *fp
, xhtmlsection
*sections
);
541 * Do all the files in this structure.
543 static void xhtml_do_files(xhtmlfile
*file
)
547 xhtml_do_files(file
->child
);
549 xhtml_do_files(file
->next
);
553 * Free up all memory used by the file tree from 'xfile' downwards
555 static void xhtml_free_file(xhtmlfile
* xfile
)
561 if (xfile
->filename
) {
562 sfree(xfile
->filename
);
564 xhtml_free_file(xfile
->child
);
565 xhtml_free_file(xfile
->next
);
572 void xhtml_backend(paragraph
*sourceform
, keywordlist
*in_keywords
,
580 sourceparas
= sourceform
;
581 conf
= xhtml_configure(sourceform
);
582 keywords
= in_keywords
;
585 /* Clear up the index entries backend data pointers */
586 for (ti
=0; (ientry
= (indexentry
*)index234(idx
->entries
, ti
))!=NULL
; ti
++) {
587 ientry
->backend_data
=NULL
;
590 xhtml_ponder_layout(sourceform
);
592 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
593 /* xhtml_level_0(sourceform);
594 for (i=1; i<=conf.leaf_level; i++)
596 xhtml_level(sourceform, i);
599 /* new system ... (writes to *.html, but isn't fully trusted) */
600 xhtml_do_top_file(topfile
, sourceform
);
601 assert(!topfile
->next
); /* shouldn't have a sibling at all */
602 if (topfile
->child
) {
603 xhtml_do_files(topfile
->child
);
607 /* release file, section, index data structures */
610 xhtmlsection
*tmp
= xsect
->chain
;
611 if (xsect
->fragment
) {
612 sfree(xsect
->fragment
);
617 xhtml_free_file(topfile
);
618 for (ti
= 0; (ientry
=(indexentry
*)index234(idx
->entries
, ti
))!=NULL
; ti
++) {
619 if (ientry
->backend_data
!=NULL
) {
620 xhtmlindex
*xi
= (xhtmlindex
*) ientry
->backend_data
;
621 if (xi
->sections
!=NULL
) {
626 ientry
->backend_data
= NULL
;
630 static int xhtml_para_level(paragraph
*p
)
634 case para_UnnumberedChapter
:
640 /* case para_BiblioCited:
653 static char* xhtml_index_filename
= "IndexPage.html";
655 /* Output the nav links for the current file.
656 * file == NULL means we're doing the index
658 static void xhtml_donavlinks(FILE *fp
, xhtmlfile
*file
)
660 xhtmlfile
*xhtml_next_file
= NULL
;
662 if (conf
.nav_attrs
!=NULL
) {
663 fprintf(fp
, " %ls>", conf
.nav_attrs
);
667 if (xhtml_last_file
==NULL
) {
668 fprintf(fp
, "Previous | ");
670 fprintf(fp
, "<a href='%s'>Previous</a> | ", xhtml_last_file
->filename
);
672 fprintf(fp
, "<a href='Contents.html'>Contents</a> | ");
673 if (file
!= NULL
) { /* otherwise we're doing nav links for the index */
674 if (xhtml_next_file
==NULL
)
675 xhtml_next_file
= file
->child
;
676 if (xhtml_next_file
==NULL
)
677 xhtml_next_file
= file
->next
;
678 if (xhtml_next_file
==NULL
)
679 xhtml_next_file
= file
->parent
->next
;
681 if (xhtml_next_file
==NULL
) {
682 if (file
==NULL
) { /* index, so no next file */
683 fprintf(fp
, "Next ");
685 fprintf(fp
, "<a href='%s'>Next</a>", xhtml_index_filename
);
688 fprintf(fp
, "<a href='%s'>Next</a>", xhtml_next_file
->filename
);
690 fprintf(fp
, "</p>\n");
693 /* Write out the index file */
694 static void xhtml_do_index_body(FILE *fp
)
699 if (count234(idx
->entries
) == 0)
700 return; /* don't write anything at all */
702 fprintf(fp
, "<dl>\n");
703 /* iterate over idx->entries using the tree functions and display everything */
704 for (ti
= 0; (y
= (indexentry
*)index234(idx
->entries
, ti
)) != NULL
; ti
++) {
705 if (y
->backend_data
) {
710 xhtml_para(fp
, y
->text
);
711 fprintf(fp
, "</dt>\n<dd>");
713 xi
= (xhtmlindex
*) y
->backend_data
;
714 for (i
=0; i
<xi
->nsection
; i
++) {
715 xhtmlsection
*sect
= xi
->sections
[i
];
717 fprintf(fp
, "<a href='%s#%s'>", sect
->file
->filename
, sect
->fragment
);
718 if (sect
->para
->kwtext
) {
719 xhtml_para(fp
, sect
->para
->kwtext
);
720 } else if (sect
->para
->words
) {
721 xhtml_para(fp
, sect
->para
->words
);
724 if (i
+1<xi
->nsection
) {
729 fprintf(fp
, "</dd>\n");
732 fprintf(fp
, "</dl>\n");
734 static void xhtml_do_index()
736 word temp_word
= { NULL
, NULL
, word_Normal
, 0, 0, L
"Index", { NULL
, 0, 0} };
737 FILE *fp
= fopen(xhtml_index_filename
, "w");
740 fatal(err_cantopenw
, xhtml_index_filename
);
741 xhtml_doheader(fp
, &temp_word
);
742 xhtml_donavlinks(fp
, NULL
);
744 xhtml_do_index_body(fp
);
746 xhtml_donavlinks(fp
, NULL
);
751 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
752 static void xhtml_do_file(xhtmlfile
*file
)
754 FILE *fp
= fopen(file
->filename
, "w");
756 fatal(err_cantopenw
, file
->filename
);
758 if (file
->sections
->para
->words
) {
759 xhtml_doheader(fp
, file
->sections
->para
->words
);
760 } else if (file
->sections
->para
->kwtext
) {
761 xhtml_doheader(fp
, file
->sections
->para
->kwtext
);
763 xhtml_doheader(fp
, NULL
);
766 xhtml_donavlinks(fp
, file
);
768 if (file
->is_leaf
&& conf
.leaf_contains_contents
&&
769 xhtml_do_contents(NULL
, file
)>=conf
.leaf_smallest_contents
)
770 xhtml_do_contents(fp
, file
);
771 xhtml_do_sections(fp
, file
->sections
);
773 xhtml_do_naked_contents(fp
, file
);
775 xhtml_donavlinks(fp
, file
);
780 xhtml_last_file
= file
;
783 /* Output the top-level file. */
784 static void xhtml_do_top_file(xhtmlfile
*file
, paragraph
*sourceform
)
788 FILE *fp
= fopen(file
->filename
, "w");
790 fatal(err_cantopenw
, file
->filename
);
792 /* Do the title -- only one allowed */
793 for (p
= sourceform
; p
&& !done
; p
= p
->next
)
795 if (p
->type
== para_Title
)
797 xhtml_doheader(fp
, p
->words
);
802 xhtml_doheader(fp
, NULL
/* Eek! */);
807 for (p
= sourceform
; p
; p
= p
->next
)
809 if (p
->type
== para_Title
) {
810 xhtml_heading(fp
, p
);
815 /* Do the preamble and copyright */
816 for (p
= sourceform
; p
; p
= p
->next
)
818 if (p
->type
== para_Preamble
)
821 xhtml_para(fp
, p
->words
);
822 fprintf(fp
, "</p>\n");
825 for (p
= sourceform
; p
; p
= p
->next
)
827 if (p
->type
== para_Copyright
)
830 xhtml_para(fp
, p
->words
);
831 fprintf(fp
, "</p>\n");
835 xhtml_do_contents(fp
, file
);
836 xhtml_do_sections(fp
, file
->sections
);
838 if (count234(idx
->entries
) > 0) {
839 fprintf(fp
, "<a name=\"index\"></a><h1>Index</h1>\n");
840 xhtml_do_index_body(fp
);
847 /* Convert a Unicode string to an ASCII one. '?' is
848 * used for unmappable characters.
850 static void xhtml_utostr(wchar_t *in
, char **out
)
857 if (in
[i
]>=32 && in
[i
]<=126)
858 (*out
)[i
]=(char)in
[i
];
866 * Write contents for the given file, and subfiles, down to
867 * the appropriate contents depth. Returns the number of
870 static int xhtml_do_contents(FILE *fp
, xhtmlfile
*file
)
872 int level
, limit
, start_level
, count
= 0;
876 level
= (file
->sections
)?
(file
->sections
->level
):(0);
877 limit
= conf
.contents_depth
[(level
>5)?
(5):(level
)];
878 start_level
= (file
->is_leaf
) ?
(level
-1) : (level
);
879 last_level
= start_level
;
881 count
+= xhtml_do_contents_section_limit(fp
, file
->sections
, limit
);
882 count
+= xhtml_do_contents_limit(fp
, file
->child
, limit
);
884 while (last_level
> start_level
) {
886 fprintf(fp
, "</ul>\n");
892 /* As above, but doesn't do anything in the current file */
893 static int xhtml_do_naked_contents(FILE *fp
, xhtmlfile
*file
)
895 int level
, limit
, start_level
, count
= 0;
899 level
= (file
->sections
)?
(file
->sections
->level
):(0);
900 limit
= conf
.contents_depth
[(level
>5)?
(5):(level
)];
901 start_level
= (file
->is_leaf
) ?
(level
-1) : (level
);
902 last_level
= start_level
;
904 count
= xhtml_do_contents_limit(fp
, file
->child
, limit
);
906 while (last_level
> start_level
) {
908 fprintf(fp
, "</ul>\n");
915 * Write contents for the given file, children, and siblings, down to
916 * given limit contents depth.
918 static int xhtml_do_contents_limit(FILE *fp
, xhtmlfile
*file
, int limit
)
922 count
+= xhtml_do_contents_section_limit(fp
, file
->sections
, limit
);
923 count
+= xhtml_do_contents_limit(fp
, file
->child
, limit
);
930 * Write contents entries for the given section tree, down to the
931 * limit contents depth.
933 static int xhtml_do_contents_section_deep_limit(FILE *fp
, xhtmlsection
*section
, int limit
)
937 if (!xhtml_add_contents_entry(fp
, section
, limit
))
941 count
+= xhtml_do_contents_section_deep_limit(fp
, section
->child
, limit
);
942 section
= section
->next
;
948 * Write contents entries for the given section tree, down to the
949 * limit contents depth.
951 static int xhtml_do_contents_section_limit(FILE *fp
, xhtmlsection
*section
, int limit
)
956 xhtml_add_contents_entry(fp
, section
, limit
);
958 count
+= xhtml_do_contents_section_deep_limit(fp
, section
->child
, limit
);
959 /* section=section->child;
960 while (section && xhtml_add_contents_entry(fp, section, limit)) {
961 section = section->next;
967 * Add a section entry, unless we're exceeding the limit, in which
968 * case return FALSE (otherwise return TRUE).
970 static int xhtml_add_contents_entry(FILE *fp
, xhtmlsection
*section
, int limit
)
972 if (!section
|| section
->level
> limit
)
974 if (fp
==NULL
|| !section
->parent
)
976 while (last_level
> section
->level
) {
978 fprintf(fp
, "</ul>\n");
980 while (last_level
< section
->level
) {
982 fprintf(fp
, "<ul>\n");
984 fprintf(fp
, "<li><a href=\"%s#%s\">", section
->file
->filename
, section
->fragment
);
985 if (section
->para
->kwtext
) {
986 xhtml_para(fp
, section
->para
->kwtext
);
987 if (section
->para
->words
) {
991 if (section
->para
->words
) {
992 xhtml_para(fp
, section
->para
->words
);
994 fprintf(fp
, "</a></li>\n");
999 * Write all the sections in this file. Do all paragraphs in this section, then all
1000 * children (recursively), then go on to the next one (tail recursively).
1002 static void xhtml_do_sections(FILE *fp
, xhtmlsection
*sections
)
1005 currentsection
= sections
;
1006 xhtml_do_paras(fp
, sections
->para
);
1007 xhtml_do_sections(fp
, sections
->child
);
1008 sections
= sections
->next
;
1012 /* Write this list of paragraphs. Close off all lists at the end. */
1013 static void xhtml_do_paras(FILE *fp
, paragraph
*p
)
1015 int last_type
= -1, first
=TRUE
;
1019 /* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1020 for (; p
&& (xhtml_para_level(p
)==-1 || first
); p
=p
->next
) {
1025 * Things we ignore because we've already processed them or
1026 * aren't going to touch them in this pass.
1030 case para_Biblio
: /* only touch BiblioCited */
1031 case para_VersionID
:
1032 case para_Copyright
:
1043 case para_UnnumberedChapter
:
1044 xhtml_heading(fp
, p
);
1049 xhtml_heading(fp
, p
);
1053 fprintf(fp
, "\n<hr />\n");
1057 fprintf(fp
, "\n<p>");
1058 xhtml_para(fp
, p
->words
);
1059 fprintf(fp
, "</p>\n");
1063 case para_NumberedList
:
1064 case para_BiblioCited
:
1065 if (last_type
!=p
->type
) {
1066 /* start up list if necessary */
1067 if (p
->type
== para_Bullet
) {
1068 fprintf(fp
, "<ul>\n");
1069 } else if (p
->type
== para_NumberedList
) {
1070 fprintf(fp
, "<ol>\n");
1071 } else if (p
->type
== para_BiblioCited
) {
1072 fprintf(fp
, "<dl>\n");
1075 if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
)
1076 fprintf(fp
, "<li>");
1077 else if (p
->type
== para_BiblioCited
) {
1078 fprintf(fp
, "<dt>");
1079 xhtml_para(fp
, p
->kwtext
);
1080 fprintf(fp
, "</dt>\n<dd>");
1082 xhtml_para(fp
, p
->words
);
1083 if (p
->type
== para_BiblioCited
) {
1084 fprintf(fp
, "</dd>\n");
1085 } else if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
) {
1086 fprintf(fp
, "</li>");
1088 if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
|| p
->type
== para_BiblioCited
)
1089 /* close off list if necessary */
1091 paragraph
*p2
= p
->next
;
1092 int close_off
=FALSE
;
1093 /* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1094 if (p2
&& xhtml_para_level(p2
)==-1) {
1095 if (p2
->type
!= p
->type
)
1101 if (p
->type
== para_Bullet
) {
1102 fprintf(fp
, "</ul>\n");
1103 } else if (p
->type
== para_NumberedList
) {
1104 fprintf(fp
, "</ol>\n");
1105 } else if (p
->type
== para_BiblioCited
) {
1106 fprintf(fp
, "</dl>\n");
1113 xhtml_codepara(fp
, p
->words
);
1116 last_type
= p
->type
;
1121 * Output a header for this XHTML file.
1123 static void xhtml_doheader(FILE *fp
, word
*title
)
1125 fprintf(fp
, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1126 fprintf(fp
, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1127 fprintf(fp
, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1129 fprintf(fp
, "The thing with no name!");
1131 xhtml_para(fp
, title
);
1132 fprintf(fp
, "</title>\n");
1133 fprintf(fp
, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version
);
1135 fprintf(fp
, "<meta name=\"author\" content=\"%ls\" />\n", conf
.author
);
1136 if (conf
.description
)
1137 fprintf(fp
, "<meta name=\"description\" content=\"%ls\" />\n", conf
.description
);
1139 fprintf(fp
, "%ls\n", conf
.head_end
);
1140 fprintf(fp
, "</head>\n\n");
1142 fprintf(fp
, "%ls\n", conf
.body
);
1144 fprintf(fp
, "<body>\n");
1145 if (conf
.body_start
)
1146 fprintf(fp
, "%ls\n", conf
.body_start
);
1150 * Output a footer for this XHTML file.
1152 static void xhtml_dofooter(FILE *fp
)
1154 fprintf(fp
, "\n<hr />\n\n");
1156 fprintf(fp
, "%ls\n", conf
.body_end
);
1157 if (!conf
.suppress_address
) {
1158 fprintf(fp
,"<address>\n");
1159 if (conf
.address_start
)
1160 fprintf(fp
, "%ls\n", conf
.address_start
);
1161 /* Do the version ID */
1162 if (conf
.include_version_id
) {
1165 for (p
= sourceparas
; p
; p
= p
->next
)
1166 if (p
->type
== para_VersionID
) {
1167 xhtml_versionid(fp
, p
->words
, started
);
1171 if (conf
.address_end
)
1172 fprintf(fp
, "%ls\n", conf
.address_end
);
1173 fprintf(fp
, "</address>\n");
1175 fprintf(fp
, "</body>\n\n</html>\n");
1179 * Output the versionid paragraph. Typically this is a version control
1180 * ID string (such as $Id...$ in RCS).
1182 static void xhtml_versionid(FILE *fp
, word
*text
, int started
)
1184 rdstringc t
= { 0, 0, NULL
};
1186 rdaddc(&t
, '['); /* FIXME: configurability */
1187 xhtml_rdaddwc(&t
, text
, NULL
);
1188 rdaddc(&t
, ']'); /* FIXME: configurability */
1191 fprintf(fp
, "<br>\n");
1192 fprintf(fp
, "%s\n", t
.text
);
1196 /* Is this an XHTML reserved character? */
1197 static int xhtml_reservedchar(int c
)
1199 if (c
=='&' || c
=='<' || c
=='>' || c
=='"')
1206 * Convert a wide string into valid XHTML: Anything outside ASCII will
1207 * be fixed up as an entity. Currently we don't worry about constraining the
1208 * encoded character set, which we should probably do at some point (we can
1209 * still fix up and return FALSE - see the last comment here). We also don't
1212 * Because this is only used for words, spaces are HARD spaces (any other
1213 * spaces will be word_Whitespace not word_Normal). So they become
1214 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1217 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1218 * it in `*result'. If `result' is NULL, merely checks whether all
1219 * characters in the string are feasible.
1221 * Return is nonzero if all characters are OK. If not all
1222 * characters are OK but `result' is non-NULL, a result _will_
1223 * still be generated!
1225 static int xhtml_convert(wchar_t *s
, char **result
, int hard_spaces
) {
1226 int doing
= (result
!= 0);
1229 int plen
= 0, psize
= 0;
1234 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1236 if (((c
== 32 && !hard_spaces
) || (c
> 32 && c
<= 126 && !xhtml_reservedchar(c
)))) {
1241 p
[plen
++] = (char)c
;
1244 /* Char needs fixing up. */
1245 /* ok = FALSE; -- currently we never return FALSE; we
1246 * might want to when considering a character set for the
1251 if (c
==32) { /* a space in a word is a hard space */
1252 ensure_size(plen
+6); /* includes space for the NUL, which is subsequently stomped on */
1253 sprintf(p
+plen
, " ");
1256 /* FIXME: entity names! */
1257 ensure_size(plen
+8); /* includes space for the NUL, which is subsequently stomped on */
1258 plen
+=sprintf(p
+plen
, "&#%04i;", (int)c
);
1264 p
= resize(p
, plen
+1);
1272 * This formats the given words as XHTML.
1274 static void xhtml_rdaddwc(rdstringc
*rs
, word
*text
, word
*end
) {
1281 for (; text
&& text
!= end
; text
= text
->next
) {
1282 switch (text
->type
) {
1283 case word_HyperLink
:
1284 xhtml_utostr(text
->text
, &c
);
1285 rdaddsc(rs
, "<a href=\"");
1291 case word_UpperXref
:
1292 case word_LowerXref
:
1293 kwl
= kw_lookup(keywords
, text
->text
);
1295 sect
=xhtml_find_section(kwl
->para
);
1297 rdaddsc(rs
, "<a href=\"");
1298 rdaddsc(rs
, sect
->file
->filename
);
1300 rdaddsc(rs
, sect
->fragment
);
1303 rdaddsc(rs
, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1304 error(err_whatever
, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1307 rdaddsc(rs
, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1308 error(err_whatever
, "Couldn't locate cross-reference! (Wasn't in source file.)");
1312 case word_IndexRef
: /* in theory we could make an index target here */
1313 /* rdaddsc(rs, "<a name=\"idx-");
1314 xhtml_utostr(text->text, &c);
1317 rdaddsc(rs, "\"></a>");*/
1318 /* what we _do_ need to do is to fix up the backend data
1319 * for any indexentry this points to.
1321 for (ti
=0; (itag
= (indextag
*)index234(idx
->tags
, ti
))!=NULL
; ti
++) {
1322 /* FIXME: really ustricmp() and not ustrcmp()? */
1323 if (ustricmp(itag
->name
, text
->text
)==0) {
1328 if (itag
->refs
!=NULL
) {
1330 for (i
=0; i
<itag
->nrefs
; i
++) {
1331 xhtmlindex
*idx_ref
;
1334 ientry
= itag
->refs
[i
];
1335 if (ientry
->backend_data
==NULL
) {
1336 idx_ref
= (xhtmlindex
*) smalloc(sizeof(xhtmlindex
));
1338 fatal(err_nomemory
);
1339 idx_ref
->nsection
= 0;
1341 idx_ref
->sections
= (xhtmlsection
**) smalloc(idx_ref
->size
* sizeof(xhtmlsection
*));
1342 if (idx_ref
->sections
==NULL
)
1343 fatal(err_nomemory
);
1344 ientry
->backend_data
= idx_ref
;
1346 idx_ref
= ientry
->backend_data
;
1347 if (idx_ref
->nsection
+1 > idx_ref
->size
) {
1348 int new_size
= idx_ref
->size
* 2;
1349 idx_ref
->sections
= srealloc(idx_ref
->sections
, new_size
* sizeof(xhtmlsection
));
1350 if (idx_ref
->sections
==NULL
) {
1351 fatal(err_nomemory
);
1353 idx_ref
->size
= new_size
;
1356 idx_ref
->sections
[idx_ref
->nsection
++] = currentsection
;
1361 fatal(err_whatever
, "Index tag had no entries!");
1364 fprintf(stderr
, "Looking for index entry '%ls'\n", text
->text
);
1365 fatal(err_whatever
, "Couldn't locate index entry! (Wasn't in index.)");
1371 rdaddsc(rs
, "</a>");
1378 case word_WhiteSpace
:
1379 case word_EmphSpace
:
1380 case word_CodeSpace
:
1381 case word_WkCodeSpace
:
1383 case word_EmphQuote
:
1384 case word_CodeQuote
:
1385 case word_WkCodeQuote
:
1386 assert(text
->type
!= word_CodeQuote
&&
1387 text
->type
!= word_WkCodeQuote
);
1388 if (towordstyle(text
->type
) == word_Emph
&&
1389 (attraux(text
->aux
) == attr_First
||
1390 attraux(text
->aux
) == attr_Only
))
1391 rdaddsc(rs
, "<em>");
1392 else if ((towordstyle(text
->type
) == word_Code
|| towordstyle(text
->type
) == word_WeakCode
) &&
1393 (attraux(text
->aux
) == attr_First
||
1394 attraux(text
->aux
) == attr_Only
))
1395 rdaddsc(rs
, "<code>");
1397 if (removeattr(text
->type
) == word_Normal
) {
1398 if (xhtml_convert(text
->text
, &c
, TRUE
)) /* spaces in the word are hard */
1401 xhtml_rdaddwc(rs
, text
->alt
, NULL
);
1403 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
1405 } else if (removeattr(text
->type
) == word_Quote
) {
1406 rdaddsc(rs
, """);
1409 if (towordstyle(text
->type
) == word_Emph
&&
1410 (attraux(text
->aux
) == attr_Last
||
1411 attraux(text
->aux
) == attr_Only
))
1412 rdaddsc(rs
, "</em>");
1413 else if ((towordstyle(text
->type
) == word_Code
|| towordstyle(text
->type
) == word_WeakCode
) &&
1414 (attraux(text
->aux
) == attr_Last
||
1415 attraux(text
->aux
) == attr_Only
))
1416 rdaddsc(rs
, "</code>");
1422 /* Output a heading, formatted as XHTML.
1424 static void xhtml_heading(FILE *fp
, paragraph
*p
)
1426 rdstringc t
= { 0, 0, NULL
};
1427 word
*tprefix
= p
->kwtext
;
1428 word
*nprefix
= p
->kwtext2
;
1429 word
*text
= p
->words
;
1430 int level
= xhtml_para_level(p
);
1431 xhtmlsection
*sect
= xhtml_find_section(p
);
1434 fragment
= sect
->fragment
;
1436 if (p
->type
== para_Title
)
1439 fragment
= ""; /* FIXME: what else can we do? */
1440 error(err_whatever
, "Couldn't locate heading cross-reference!");
1444 if (level
>2 && nprefix
) { /* FIXME: configurability on the level thing */
1445 xhtml_rdaddwc(&t
, nprefix
, NULL
);
1446 rdaddc(&t
, ' '); /* FIXME: as below */
1447 } else if (tprefix
) {
1448 xhtml_rdaddwc(&t
, tprefix
, NULL
);
1449 rdaddsc(&t
, ": "); /* FIXME: configurability */
1451 xhtml_rdaddwc(&t
, text
, NULL
);
1452 fprintf(fp
, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment
, level
, t
.text
, level
);
1456 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1457 * This looks pretty simple; I may have missed something ...
1459 static void xhtml_para(FILE *fp
, word
*text
)
1461 rdstringc out
= { 0, 0, NULL
};
1462 xhtml_rdaddwc(&out
, text
, NULL
);
1463 fprintf(fp
, "%s", out
.text
);
1467 /* Output a code paragraph. I'm treating this as preformatted, which
1468 * may not be entirely correct. See xhtml_para() for my worries about
1469 * this being overly-simple; however I think that most of the complexity
1470 * of the text backend came entirely out of word wrapping anyway.
1472 static void xhtml_codepara(FILE *fp
, word
*text
)
1474 fprintf(fp
, "<pre>");
1475 for (; text
; text
= text
->next
) if (text
->type
== word_WeakCode
) {
1477 xhtml_convert(text
->text
, &c
, FALSE
);
1478 fprintf(fp
, "%s\n", c
);
1481 fprintf(fp
, "</pre>\n");