2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
34 struct xhtmlsection_Struct
{
35 struct xhtmlsection_Struct
*next
; /* next sibling (NULL if split across files) */
36 struct xhtmlsection_Struct
*child
; /* NULL if split across files */
37 struct xhtmlsection_Struct
*parent
; /* NULL if split across files */
38 struct xhtmlsection_Struct
*chain
; /* single structure independent of weird trees */
40 struct xhtmlfile_Struct
*file
; /* which file is this a part of? */
41 char *fragment
; /* fragment id within the file */
45 struct xhtmlfile_Struct
{
46 struct xhtmlfile_Struct
*next
;
47 struct xhtmlfile_Struct
*child
;
48 struct xhtmlfile_Struct
*parent
;
50 struct xhtmlsection_Struct
*sections
; /* sections within this file (only one for non-leaf) */
51 int is_leaf
; /* is this file a leaf file, ie does it not have any children? */
54 typedef struct xhtmlsection_Struct xhtmlsection
;
55 typedef struct xhtmlfile_Struct xhtmlfile
;
56 typedef struct xhtmlindex_Struct xhtmlindex
;
58 struct xhtmlindex_Struct
{
61 xhtmlsection
**sections
;
66 wchar_t *number_suffix
;
70 int contents_depth
[6];
71 int leaf_contains_contents
;
73 int leaf_smallest_contents
;
74 int include_version_id
;
75 wchar_t *author
, *description
;
76 wchar_t *head_end
, *body
, *body_start
, *body_end
, *address_start
, *address_end
, *nav_attrs
;
78 xhtmlheadfmt fchapter
, *fsect
;
82 /*static void xhtml_level(paragraph *, int);
83 static void xhtml_level_0(paragraph *);
84 static void xhtml_docontents(FILE *, paragraph *, int);
85 static void xhtml_dosections(FILE *, paragraph *, int);
86 static void xhtml_dobody(FILE *, paragraph *, int);*/
88 static void xhtml_doheader(FILE *, word
*);
89 static void xhtml_dofooter(FILE *);
90 static void xhtml_versionid(FILE *, word
*, int);
92 static void xhtml_utostr(wchar_t *, char **);
93 static int xhtml_para_level(paragraph
*);
94 static int xhtml_reservedchar(int);
96 static int xhtml_convert(wchar_t *, char **, int);
97 static void xhtml_rdaddwc(rdstringc
*, word
*, word
*);
98 static void xhtml_para(FILE *, word
*);
99 static void xhtml_codepara(FILE *, word
*);
100 static void xhtml_heading(FILE *, paragraph
*);
102 /* File-global variables are much easier than passing these things
103 * all over the place. Evil, but easier. We can replace this with a single
104 * structure at some point.
106 static xhtmlconfig conf
;
107 static keywordlist
*keywords
;
108 static indexdata
*idx
;
109 static xhtmlfile
*topfile
;
110 static xhtmlsection
*topsection
;
111 static paragraph
*sourceparas
;
112 static xhtmlfile
*lastfile
;
113 static xhtmlfile
*xhtml_last_file
= NULL
;
114 static int last_level
=-1;
115 static xhtmlsection
*currentsection
;
117 static xhtmlconfig
xhtml_configure(paragraph
*source
)
124 ret
.contents_depth
[0] = 2;
125 ret
.contents_depth
[1] = 3;
126 ret
.contents_depth
[2] = 4;
127 ret
.contents_depth
[3] = 5;
128 ret
.contents_depth
[4] = 6;
129 ret
.contents_depth
[5] = 7;
131 ret
.leaf_smallest_contents
= 4;
132 ret
.leaf_contains_contents
= FALSE
;
133 ret
.include_version_id
= TRUE
;
135 ret
.description
= NULL
;
138 ret
.body_start
= NULL
;
140 ret
.address_start
= NULL
;
141 ret
.address_end
= NULL
;
142 ret
.nav_attrs
= NULL
;
143 ret
.suppress_address
= FALSE
;
145 ret
.fchapter
.just_numbers
= FALSE
;
146 ret
.fchapter
.number_suffix
= ustrdup(L
": ");
148 ret
.fsect
= mknewa(xhtmlheadfmt
, ret
.nfsect
);
149 ret
.fsect
[0].just_numbers
= FALSE
;
150 ret
.fsect
[0].number_suffix
= ustrdup(L
": ");
151 ret
.fsect
[1].just_numbers
= TRUE
;
152 ret
.fsect
[1].number_suffix
= ustrdup(L
" ");
154 for (; source
; source
= source
->next
)
156 if (source
->type
== para_Config
)
158 if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-0")) {
159 ret
.contents_depth
[0] = utoi(uadv(source
->keyword
));
160 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-1")) {
161 ret
.contents_depth
[1] = utoi(uadv(source
->keyword
));
162 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-2")) {
163 ret
.contents_depth
[2] = utoi(uadv(source
->keyword
));
164 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-3")) {
165 ret
.contents_depth
[3] = utoi(uadv(source
->keyword
));
166 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-4")) {
167 ret
.contents_depth
[4] = utoi(uadv(source
->keyword
));
168 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-5")) {
169 ret
.contents_depth
[5] = utoi(uadv(source
->keyword
));
170 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-level")) {
171 ret
.leaf_level
= utoi(uadv(source
->keyword
));
172 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-smallest-contents")) {
173 ret
.leaf_smallest_contents
= utoi(uadv(source
->keyword
));
174 } else if (!ustricmp(source
->keyword
, L
"xhtml-versionid")) {
175 ret
.include_version_id
= utob(uadv(source
->keyword
));
176 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-contains-contents")) {
177 ret
.leaf_contains_contents
= utob(uadv(source
->keyword
));
178 } else if (!ustricmp(source
->keyword
, L
"xhtml-suppress-address")) {
179 ret
.suppress_address
= utob(uadv(source
->keyword
));
180 } else if (!ustricmp(source
->keyword
, L
"xhtml-author")) {
181 ret
.author
= uadv(source
->keyword
);
182 } else if (!ustricmp(source
->keyword
, L
"xhtml-description")) {
183 ret
.description
= uadv(source
->keyword
);
184 } else if (!ustricmp(source
->keyword
, L
"xhtml-head-end")) {
185 ret
.head_end
= uadv(source
->keyword
);
186 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-start")) {
187 ret
.body_start
= uadv(source
->keyword
);
188 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-tag")) {
189 ret
.body
= uadv(source
->keyword
);
190 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-end")) {
191 ret
.body_end
= uadv(source
->keyword
);
192 } else if (!ustricmp(source
->keyword
, L
"xhtml-address-start")) {
193 ret
.address_start
= uadv(source
->keyword
);
194 } else if (!ustricmp(source
->keyword
, L
"xhtml-address-end")) {
195 ret
.address_end
= uadv(source
->keyword
);
196 } else if (!ustricmp(source
->keyword
, L
"xhtml-navigation-attributes")) {
197 ret
.nav_attrs
= uadv(source
->keyword
);
198 } else if (!ustricmp(source
->keyword
, L
"xhtml-chapter-numeric")) {
199 ret
.fchapter
.just_numbers
= utob(uadv(source
->keyword
));
200 } else if (!ustricmp(source
->keyword
, L
"xhtml-chapter-suffix")) {
201 ret
.fchapter
.number_suffix
= uadv(source
->keyword
);
202 } else if (!ustricmp(source
->keyword
, L
"xhtml-section-numeric")) {
203 wchar_t *p
= uadv(source
->keyword
);
209 if (n
>= ret
.nfsect
) {
211 ret
.fsect
= resize(ret
.fsect
, n
+1);
212 for (i
= ret
.nfsect
; i
<= n
; i
++)
213 ret
.fsect
[i
] = ret
.fsect
[ret
.nfsect
-1];
216 ret
.fsect
[n
].just_numbers
= utob(p
);
217 } else if (!ustricmp(source
->keyword
, L
"xhtml-section-suffix")) {
218 wchar_t *p
= uadv(source
->keyword
);
224 if (n
>= ret
.nfsect
) {
226 ret
.fsect
= resize(ret
.fsect
, n
+1);
227 for (i
= ret
.nfsect
; i
<= n
; i
++)
228 ret
.fsect
[i
] = ret
.fsect
[ret
.nfsect
-1];
231 ret
.fsect
[n
].number_suffix
= p
;
236 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
237 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
238 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
239 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
240 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
241 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
242 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
243 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
247 static xhtmlsection
*xhtml_new_section(xhtmlsection
*last
)
249 xhtmlsection
*ret
= mknew(xhtmlsection
);
257 ret
->level
=-1; /* marker: end of chain */
261 /* Returns NULL or the section that marks that paragraph */
262 static xhtmlsection
*xhtml_find_section(paragraph
*p
)
264 xhtmlsection
*ret
= topsection
;
265 if (xhtml_para_level(p
)==-1) { /* first, we back-track to a section paragraph */
266 paragraph
*p2
= sourceparas
;
267 paragraph
*p3
= NULL
;
268 while (p2
&& p2
!=p
) {
269 if (xhtml_para_level(p2
)!=-1) {
274 if (p3
==NULL
) { /* for some reason, we couldn't find a section before this paragraph ... ? */
275 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
276 * So don't do that, then.
282 while (ret
&& ret
->para
!= p
) {
283 /* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
289 static xhtmlfile
*xhtml_new_file(xhtmlsection
*sect
)
291 xhtmlfile
*ret
= mknew(xhtmlfile
);
298 ret
->is_leaf
=(sect
!=NULL
&& sect
->level
==conf
.leaf_level
);
300 if (conf
.leaf_level
==0) { /* currently unused */
301 #define FILENAME_MANUAL "Manual.html"
302 #define FILENAME_CONTENTS "Contents.html"
303 ret
->filename
= smalloc(strlen(FILENAME_MANUAL
)+1);
304 sprintf(ret
->filename
, FILENAME_MANUAL
);
306 ret
->filename
= smalloc(strlen(FILENAME_CONTENTS
)+1);
307 sprintf(ret
->filename
, FILENAME_CONTENTS
);
310 paragraph
*p
= sect
->para
;
311 rdstringc fname_c
= { 0, 0, NULL
};
314 for (w
=(p
->kwtext
)?
(p
->kwtext
):(p
->words
); w
; w
=w
->next
)
316 switch (removeattr(w
->type
))
321 case word_WeakCode:*/
322 xhtml_utostr(w
->text
, &c
);
328 rdaddsc(&fname_c
, ".html");
329 ret
->filename
= rdtrimc(&fname_c
);
331 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
336 * Walk the tree fixing up files which are actually leaf (ie
337 * have no children) but aren't at leaf level, so they have the
340 void xhtml_fixup_layout(xhtmlfile
* file
)
342 if (file
->child
==NULL
) {
343 file
->is_leaf
= TRUE
;
345 xhtml_fixup_layout(file
->child
);
348 xhtml_fixup_layout(file
->next
);
352 * Create the tree structure so we know where everything goes.
355 * Ignoring file splitting, we have three choices with each new section:
357 * +-----------------+-----------------+
365 * Y is the last section we added (currentsect).
366 * If sect is the section we want to add, then:
368 * (1) if sect->level < currentsect->level
369 * (2) if sect->level == currentsect->level
370 * (3) if sect->level > currentsect->level
372 * This requires the constraint that you never skip section numbers
373 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
375 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
376 * more than one level at a time. Lots of asserts, and probably part of
377 * the algorithm here, rely on this being true. (It currently isn't
378 * enforced by halibut, however.)
380 * File splitting makes this harder. For instance, say we added at (3)
381 * above and now need to add another section. We are splitting at level
382 * 2, ie the level of Y. Z is the last section we added:
384 * +-----------------+-----------------+
394 * The (1) case is now split; we need to search upwards to find where
395 * to actually link in. The other two cases remain the same (and will
396 * always be like this).
398 * File splitting makes this harder, however. The decision of whether
399 * to split to a new file is always on the same condition, however (is
400 * the level of this section higher than the leaf_level configuration
403 * Treating the cases backwards:
405 * (3) same file if sect->level > conf.leaf_level, otherwise new file
407 * if in the same file, currentsect->child points to sect
408 * otherwise the linking is done through the file tree (which works
409 * in more or less the same way, ie currentfile->child points to
412 * (2) same file if sect->level > conf.leaf_level, otherwise new file
414 * if in the same file, currentsect->next points to sect
415 * otherwise file linking and currentfile->next points to the new
416 * file (we know that Z must have caused a new file to be created)
418 * (1) same file if sect->level > conf.leaf_level, otherwise new file
420 * this is actually effectively the same case as (2) here,
421 * except that we first have to travel up the sections to figure
422 * out which section this new one will be a sibling of. In doing
423 * so, we may disappear off the top of a file and have to go up
424 * to its parent in the file tree.
427 static void xhtml_ponder_layout(paragraph
*p
)
429 xhtmlsection
*lastsection
;
430 xhtmlsection
*currentsect
;
431 xhtmlfile
*currentfile
;
434 topsection
= xhtml_new_section(NULL
);
435 topfile
= xhtml_new_file(NULL
);
436 lastsection
= topsection
;
437 currentfile
= topfile
;
438 currentsect
= topsection
;
440 if (conf
.leaf_level
== 0) {
441 topfile
->is_leaf
= 1;
442 topfile
->sections
= topsection
;
443 topsection
->file
= topfile
;
448 int level
= xhtml_para_level(p
);
449 if (level
>0) /* actually a section */
454 rdstringc fname_c
= { 0, 0, NULL
};
456 sect
= xhtml_new_section(lastsection
);
459 for (w
=(p
->kwtext2
)?
(p
->kwtext2
):(p
->words
); w
; w
=w
->next
) /* kwtext2 because we want numbers only! */
461 switch (removeattr(w
->type
))
466 case word_WeakCode:*/
467 xhtml_utostr(w
->text
, &c
);
473 /* rdaddsc(&fname_c, ".html");*/
474 sect
->fragment
= rdtrimc(&fname_c
);
476 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
478 if (level
>currentsect
->level
) { /* case (3) */
479 if (level
>conf
.leaf_level
) { /* same file */
480 assert(currentfile
->is_leaf
);
481 currentsect
->child
= sect
;
482 sect
->parent
=currentsect
;
483 sect
->file
=currentfile
;
484 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
486 } else { /* new file */
487 xhtmlfile
*file
= xhtml_new_file(sect
);
488 assert(!currentfile
->is_leaf
);
489 currentfile
->child
=file
;
491 file
->parent
=currentfile
;
492 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
496 } else if (level
>= currentsect
->file
->sections
->level
) {
497 /* Case (1) or (2) *AND* still under the section that starts
500 * I'm not convinced that this couldn't be rolled in with the
501 * final else {} leg further down. It seems a lot of effort
504 if (level
>conf
.leaf_level
) { /* stick within the same file */
505 assert(currentfile
->is_leaf
);
506 sect
->file
= currentfile
;
507 while (currentsect
&& currentsect
->level
> level
&&
508 currentsect
->file
==currentsect
->parent
->file
) {
509 currentsect
= currentsect
->parent
;
512 currentsect
->next
= sect
;
513 assert(currentsect
->level
== sect
->level
);
514 sect
->parent
= currentsect
->parent
;
516 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
517 } else { /* new file */
518 xhtmlfile
*file
= xhtml_new_file(sect
);
520 currentfile
->next
=file
;
521 file
->parent
=currentfile
->parent
;
522 file
->is_leaf
=(level
==conf
.leaf_level
);
524 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
528 } else { /* Case (1) or (2) and we must move up the file tree first */
529 /* this loop is now probably irrelevant - we know we can't connect
530 * to anything in the current file */
531 while (currentsect
&& level
<currentsect
->level
) {
532 currentsect
=currentsect
->parent
;
534 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
536 /* printf(" * up one level (off top of current file)\n");*/
540 /* I'm pretty sure this can now never fire */
541 assert(currentfile
->is_leaf
);
542 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
543 sect
->file
= currentfile
;
544 currentsect
->next
=sect
;
546 } else { /* find a file we can attach to */
547 while (currentfile
&& currentfile
->sections
&& level
<currentfile
->sections
->level
) {
548 currentfile
=currentfile
->parent
;
550 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
552 /* printf(" * up one file level (off top of tree)\n");*/
555 if (currentfile
) { /* new file (we had to skip up a file to
556 get here, so we must be dealing with a
557 level no lower than the configured
559 xhtmlfile
*file
= xhtml_new_file(sect
);
560 currentfile
->next
=file
;
562 file
->parent
=currentfile
->parent
;
563 file
->is_leaf
=(level
==conf
.leaf_level
);
565 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
569 fatal(err_whatever
, "Ran off the top trying to connect sibling: strange document.");
575 topsection
= lastsection
; /* get correct end of the chain */
576 xhtml_fixup_layout(topfile
); /* leaf files not at leaf level marked as such */
579 static void xhtml_do_index();
580 static void xhtml_do_file(xhtmlfile
*file
);
581 static void xhtml_do_top_file(xhtmlfile
*file
, paragraph
*sourceform
);
582 static void xhtml_do_paras(FILE *fp
, paragraph
*p
);
583 static int xhtml_do_contents_limit(FILE *fp
, xhtmlfile
*file
, int limit
);
584 static int xhtml_do_contents_section_limit(FILE *fp
, xhtmlsection
*section
, int limit
);
585 static int xhtml_add_contents_entry(FILE *fp
, xhtmlsection
*section
, int limit
);
586 static int xhtml_do_contents(FILE *fp
, xhtmlfile
*file
);
587 static int xhtml_do_naked_contents(FILE *fp
, xhtmlfile
*file
);
588 static void xhtml_do_sections(FILE *fp
, xhtmlsection
*sections
);
591 * Do all the files in this structure.
593 static void xhtml_do_files(xhtmlfile
*file
)
597 xhtml_do_files(file
->child
);
599 xhtml_do_files(file
->next
);
603 * Free up all memory used by the file tree from 'xfile' downwards
605 static void xhtml_free_file(xhtmlfile
* xfile
)
611 if (xfile
->filename
) {
612 sfree(xfile
->filename
);
614 xhtml_free_file(xfile
->child
);
615 xhtml_free_file(xfile
->next
);
622 void xhtml_backend(paragraph
*sourceform
, keywordlist
*in_keywords
,
630 sourceparas
= sourceform
;
631 conf
= xhtml_configure(sourceform
);
632 keywords
= in_keywords
;
635 /* Clear up the index entries backend data pointers */
636 for (ti
=0; (ientry
= (indexentry
*)index234(idx
->entries
, ti
))!=NULL
; ti
++) {
637 ientry
->backend_data
=NULL
;
640 xhtml_ponder_layout(sourceform
);
642 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
643 /* xhtml_level_0(sourceform);
644 for (i=1; i<=conf.leaf_level; i++)
646 xhtml_level(sourceform, i);
649 /* new system ... (writes to *.html, but isn't fully trusted) */
650 xhtml_do_top_file(topfile
, sourceform
);
651 assert(!topfile
->next
); /* shouldn't have a sibling at all */
652 if (topfile
->child
) {
653 xhtml_do_files(topfile
->child
);
657 /* release file, section, index data structures */
660 xhtmlsection
*tmp
= xsect
->chain
;
661 if (xsect
->fragment
) {
662 sfree(xsect
->fragment
);
667 xhtml_free_file(topfile
);
668 for (ti
= 0; (ientry
=(indexentry
*)index234(idx
->entries
, ti
))!=NULL
; ti
++) {
669 if (ientry
->backend_data
!=NULL
) {
670 xhtmlindex
*xi
= (xhtmlindex
*) ientry
->backend_data
;
671 if (xi
->sections
!=NULL
) {
676 ientry
->backend_data
= NULL
;
680 static int xhtml_para_level(paragraph
*p
)
687 case para_UnnumberedChapter
:
692 /* case para_BiblioCited:
705 static char* xhtml_index_filename
= "IndexPage.html";
707 /* Output the nav links for the current file.
708 * file == NULL means we're doing the index
710 static void xhtml_donavlinks(FILE *fp
, xhtmlfile
*file
)
712 xhtmlfile
*xhtml_next_file
= NULL
;
714 if (conf
.nav_attrs
!=NULL
) {
715 fprintf(fp
, " %ls>", conf
.nav_attrs
);
719 if (xhtml_last_file
==NULL
) {
720 fprintf(fp
, "Previous | ");
722 fprintf(fp
, "<a href='%s'>Previous</a> | ", xhtml_last_file
->filename
);
724 fprintf(fp
, "<a href='Contents.html'>Contents</a> | ");
725 if (file
!= NULL
) { /* otherwise we're doing nav links for the index */
726 if (xhtml_next_file
==NULL
)
727 xhtml_next_file
= file
->child
;
728 if (xhtml_next_file
==NULL
)
729 xhtml_next_file
= file
->next
;
730 if (xhtml_next_file
==NULL
)
731 xhtml_next_file
= file
->parent
->next
;
733 if (xhtml_next_file
==NULL
) {
734 if (file
==NULL
) { /* index, so no next file */
735 fprintf(fp
, "Next ");
737 fprintf(fp
, "<a href='%s'>Next</a>", xhtml_index_filename
);
740 fprintf(fp
, "<a href='%s'>Next</a>", xhtml_next_file
->filename
);
742 fprintf(fp
, "</p>\n");
745 /* Write out the index file */
746 static void xhtml_do_index_body(FILE *fp
)
751 if (count234(idx
->entries
) == 0)
752 return; /* don't write anything at all */
754 fprintf(fp
, "<dl>\n");
755 /* iterate over idx->entries using the tree functions and display everything */
756 for (ti
= 0; (y
= (indexentry
*)index234(idx
->entries
, ti
)) != NULL
; ti
++) {
757 if (y
->backend_data
) {
762 xhtml_para(fp
, y
->text
);
763 fprintf(fp
, "</dt>\n<dd>");
765 xi
= (xhtmlindex
*) y
->backend_data
;
766 for (i
=0; i
<xi
->nsection
; i
++) {
767 xhtmlsection
*sect
= xi
->sections
[i
];
769 fprintf(fp
, "<a href='%s#%s'>", sect
->file
->filename
, sect
->fragment
);
770 if (sect
->para
->kwtext
) {
771 xhtml_para(fp
, sect
->para
->kwtext
);
772 } else if (sect
->para
->words
) {
773 xhtml_para(fp
, sect
->para
->words
);
776 if (i
+1<xi
->nsection
) {
781 fprintf(fp
, "</dd>\n");
784 fprintf(fp
, "</dl>\n");
786 static void xhtml_do_index()
788 word temp_word
= { NULL
, NULL
, word_Normal
, 0, 0, L
"Index", { NULL
, 0, 0} };
789 FILE *fp
= fopen(xhtml_index_filename
, "w");
792 fatal(err_cantopenw
, xhtml_index_filename
);
793 xhtml_doheader(fp
, &temp_word
);
794 xhtml_donavlinks(fp
, NULL
);
796 xhtml_do_index_body(fp
);
798 xhtml_donavlinks(fp
, NULL
);
803 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
804 static void xhtml_do_file(xhtmlfile
*file
)
806 FILE *fp
= fopen(file
->filename
, "w");
808 fatal(err_cantopenw
, file
->filename
);
810 if (file
->sections
->para
->words
) {
811 xhtml_doheader(fp
, file
->sections
->para
->words
);
812 } else if (file
->sections
->para
->kwtext
) {
813 xhtml_doheader(fp
, file
->sections
->para
->kwtext
);
815 xhtml_doheader(fp
, NULL
);
818 xhtml_donavlinks(fp
, file
);
820 if (file
->is_leaf
&& conf
.leaf_contains_contents
&&
821 xhtml_do_contents(NULL
, file
)>=conf
.leaf_smallest_contents
)
822 xhtml_do_contents(fp
, file
);
823 xhtml_do_sections(fp
, file
->sections
);
825 xhtml_do_naked_contents(fp
, file
);
827 xhtml_donavlinks(fp
, file
);
832 xhtml_last_file
= file
;
835 /* Output the top-level file. */
836 static void xhtml_do_top_file(xhtmlfile
*file
, paragraph
*sourceform
)
840 FILE *fp
= fopen(file
->filename
, "w");
842 fatal(err_cantopenw
, file
->filename
);
844 /* Do the title -- only one allowed */
845 for (p
= sourceform
; p
&& !done
; p
= p
->next
)
847 if (p
->type
== para_Title
)
849 xhtml_doheader(fp
, p
->words
);
854 xhtml_doheader(fp
, NULL
/* Eek! */);
859 for (p
= sourceform
; p
; p
= p
->next
)
861 if (p
->type
== para_Title
) {
862 xhtml_heading(fp
, p
);
867 /* Do the preamble and copyright */
868 for (p
= sourceform
; p
; p
= p
->next
)
870 if (p
->type
== para_Preamble
)
873 xhtml_para(fp
, p
->words
);
874 fprintf(fp
, "</p>\n");
877 for (p
= sourceform
; p
; p
= p
->next
)
879 if (p
->type
== para_Copyright
)
882 xhtml_para(fp
, p
->words
);
883 fprintf(fp
, "</p>\n");
887 xhtml_do_contents(fp
, file
);
888 xhtml_do_sections(fp
, file
->sections
);
891 * Put the index in the top file if we're in single-file mode
894 if (conf
.leaf_level
== 0 && count234(idx
->entries
) > 0) {
895 fprintf(fp
, "<a name=\"index\"></a><h1>Index</h1>\n");
896 xhtml_do_index_body(fp
);
903 /* Convert a Unicode string to an ASCII one. '?' is
904 * used for unmappable characters.
906 static void xhtml_utostr(wchar_t *in
, char **out
)
913 if (in
[i
]>=32 && in
[i
]<=126)
914 (*out
)[i
]=(char)in
[i
];
922 * Write contents for the given file, and subfiles, down to
923 * the appropriate contents depth. Returns the number of
926 static int xhtml_do_contents(FILE *fp
, xhtmlfile
*file
)
928 int level
, limit
, start_level
, count
= 0;
932 level
= (file
->sections
)?
(file
->sections
->level
):(0);
933 limit
= conf
.contents_depth
[(level
>5)?
(5):(level
)];
934 start_level
= (file
->is_leaf
) ?
(level
-1) : (level
);
935 last_level
= start_level
;
937 count
+= xhtml_do_contents_section_limit(fp
, file
->sections
, limit
);
938 count
+= xhtml_do_contents_limit(fp
, file
->child
, limit
);
940 while (last_level
> start_level
) {
942 fprintf(fp
, "</ul>\n");
948 /* As above, but doesn't do anything in the current file */
949 static int xhtml_do_naked_contents(FILE *fp
, xhtmlfile
*file
)
951 int level
, limit
, start_level
, count
= 0;
955 level
= (file
->sections
)?
(file
->sections
->level
):(0);
956 limit
= conf
.contents_depth
[(level
>5)?
(5):(level
)];
957 start_level
= (file
->is_leaf
) ?
(level
-1) : (level
);
958 last_level
= start_level
;
960 count
= xhtml_do_contents_limit(fp
, file
->child
, limit
);
962 while (last_level
> start_level
) {
964 fprintf(fp
, "</ul>\n");
971 * Write contents for the given file, children, and siblings, down to
972 * given limit contents depth.
974 static int xhtml_do_contents_limit(FILE *fp
, xhtmlfile
*file
, int limit
)
978 count
+= xhtml_do_contents_section_limit(fp
, file
->sections
, limit
);
979 count
+= xhtml_do_contents_limit(fp
, file
->child
, limit
);
986 * Write contents entries for the given section tree, down to the
987 * limit contents depth.
989 static int xhtml_do_contents_section_deep_limit(FILE *fp
, xhtmlsection
*section
, int limit
)
993 if (!xhtml_add_contents_entry(fp
, section
, limit
))
997 count
+= xhtml_do_contents_section_deep_limit(fp
, section
->child
, limit
);
998 section
= section
->next
;
1004 * Write contents entries for the given section tree, down to the
1005 * limit contents depth.
1007 static int xhtml_do_contents_section_limit(FILE *fp
, xhtmlsection
*section
, int limit
)
1012 xhtml_add_contents_entry(fp
, section
, limit
);
1014 count
+= xhtml_do_contents_section_deep_limit(fp
, section
->child
, limit
);
1015 /* section=section->child;
1016 while (section && xhtml_add_contents_entry(fp, section, limit)) {
1017 section = section->next;
1023 * Add a section entry, unless we're exceeding the limit, in which
1024 * case return FALSE (otherwise return TRUE).
1026 static int xhtml_add_contents_entry(FILE *fp
, xhtmlsection
*section
, int limit
)
1028 if (!section
|| section
->level
> limit
)
1030 if (fp
==NULL
|| section
->level
< 0)
1032 while (last_level
> section
->level
) {
1034 fprintf(fp
, "</ul>\n");
1036 while (last_level
< section
->level
) {
1038 fprintf(fp
, "<ul>\n");
1040 fprintf(fp
, "<li><a href=\"%s#%s\">", section
->file
->filename
, section
->fragment
);
1041 if (section
->para
->kwtext
) {
1042 xhtml_para(fp
, section
->para
->kwtext
);
1043 if (section
->para
->words
) {
1047 if (section
->para
->words
) {
1048 xhtml_para(fp
, section
->para
->words
);
1050 fprintf(fp
, "</a></li>\n");
1055 * Write all the sections in this file. Do all paragraphs in this section, then all
1056 * children (recursively), then go on to the next one (tail recursively).
1058 static void xhtml_do_sections(FILE *fp
, xhtmlsection
*sections
)
1061 currentsection
= sections
;
1062 xhtml_do_paras(fp
, sections
->para
);
1063 xhtml_do_sections(fp
, sections
->child
);
1064 sections
= sections
->next
;
1068 /* Write this list of paragraphs. Close off all lists at the end. */
1069 static void xhtml_do_paras(FILE *fp
, paragraph
*p
)
1071 int last_type
= -1, first
=TRUE
;
1075 /* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1076 for (; p
&& (xhtml_para_level(p
)==-1 || first
); p
=p
->next
) {
1081 * Things we ignore because we've already processed them or
1082 * aren't going to touch them in this pass.
1086 case para_Biblio
: /* only touch BiblioCited */
1087 case para_VersionID
:
1088 case para_Copyright
:
1099 case para_UnnumberedChapter
:
1100 xhtml_heading(fp
, p
);
1105 xhtml_heading(fp
, p
);
1109 fprintf(fp
, "\n<hr />\n");
1113 fprintf(fp
, "\n<p>");
1114 xhtml_para(fp
, p
->words
);
1115 fprintf(fp
, "</p>\n");
1119 case para_NumberedList
:
1120 case para_BiblioCited
:
1121 if (last_type
!=p
->type
) {
1122 /* start up list if necessary */
1123 if (p
->type
== para_Bullet
) {
1124 fprintf(fp
, "<ul>\n");
1125 } else if (p
->type
== para_NumberedList
) {
1126 fprintf(fp
, "<ol>\n");
1127 } else if (p
->type
== para_BiblioCited
) {
1128 fprintf(fp
, "<dl>\n");
1131 if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
)
1132 fprintf(fp
, "<li>");
1133 else if (p
->type
== para_BiblioCited
) {
1134 fprintf(fp
, "<dt>");
1135 xhtml_para(fp
, p
->kwtext
);
1136 fprintf(fp
, "</dt>\n<dd>");
1138 xhtml_para(fp
, p
->words
);
1139 if (p
->type
== para_BiblioCited
) {
1140 fprintf(fp
, "</dd>\n");
1141 } else if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
) {
1142 fprintf(fp
, "</li>");
1144 if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
|| p
->type
== para_BiblioCited
)
1145 /* close off list if necessary */
1147 paragraph
*p2
= p
->next
;
1148 int close_off
=FALSE
;
1149 /* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1150 if (p2
&& xhtml_para_level(p2
)==-1) {
1151 if (p2
->type
!= p
->type
)
1157 if (p
->type
== para_Bullet
) {
1158 fprintf(fp
, "</ul>\n");
1159 } else if (p
->type
== para_NumberedList
) {
1160 fprintf(fp
, "</ol>\n");
1161 } else if (p
->type
== para_BiblioCited
) {
1162 fprintf(fp
, "</dl>\n");
1169 xhtml_codepara(fp
, p
->words
);
1172 last_type
= p
->type
;
1177 * Output a header for this XHTML file.
1179 static void xhtml_doheader(FILE *fp
, word
*title
)
1181 fprintf(fp
, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1182 fprintf(fp
, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1183 fprintf(fp
, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1185 fprintf(fp
, "The thing with no name!");
1187 xhtml_para(fp
, title
);
1188 fprintf(fp
, "</title>\n");
1189 fprintf(fp
, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version
);
1191 fprintf(fp
, "<meta name=\"author\" content=\"%ls\" />\n", conf
.author
);
1192 if (conf
.description
)
1193 fprintf(fp
, "<meta name=\"description\" content=\"%ls\" />\n", conf
.description
);
1195 fprintf(fp
, "%ls\n", conf
.head_end
);
1196 fprintf(fp
, "</head>\n\n");
1198 fprintf(fp
, "%ls\n", conf
.body
);
1200 fprintf(fp
, "<body>\n");
1201 if (conf
.body_start
)
1202 fprintf(fp
, "%ls\n", conf
.body_start
);
1206 * Output a footer for this XHTML file.
1208 static void xhtml_dofooter(FILE *fp
)
1210 fprintf(fp
, "\n<hr />\n\n");
1212 fprintf(fp
, "%ls\n", conf
.body_end
);
1213 if (!conf
.suppress_address
) {
1214 fprintf(fp
,"<address>\n");
1215 if (conf
.address_start
)
1216 fprintf(fp
, "%ls\n", conf
.address_start
);
1217 /* Do the version ID */
1218 if (conf
.include_version_id
) {
1221 for (p
= sourceparas
; p
; p
= p
->next
)
1222 if (p
->type
== para_VersionID
) {
1223 xhtml_versionid(fp
, p
->words
, started
);
1227 if (conf
.address_end
)
1228 fprintf(fp
, "%ls\n", conf
.address_end
);
1229 fprintf(fp
, "</address>\n");
1231 fprintf(fp
, "</body>\n\n</html>\n");
1235 * Output the versionid paragraph. Typically this is a version control
1236 * ID string (such as $Id...$ in RCS).
1238 static void xhtml_versionid(FILE *fp
, word
*text
, int started
)
1240 rdstringc t
= { 0, 0, NULL
};
1242 rdaddc(&t
, '['); /* FIXME: configurability */
1243 xhtml_rdaddwc(&t
, text
, NULL
);
1244 rdaddc(&t
, ']'); /* FIXME: configurability */
1247 fprintf(fp
, "<br>\n");
1248 fprintf(fp
, "%s\n", t
.text
);
1252 /* Is this an XHTML reserved character? */
1253 static int xhtml_reservedchar(int c
)
1255 if (c
=='&' || c
=='<' || c
=='>' || c
=='"')
1262 * Convert a wide string into valid XHTML: Anything outside ASCII will
1263 * be fixed up as an entity. Currently we don't worry about constraining the
1264 * encoded character set, which we should probably do at some point (we can
1265 * still fix up and return FALSE - see the last comment here). We also don't
1268 * Because this is only used for words, spaces are HARD spaces (any other
1269 * spaces will be word_Whitespace not word_Normal). So they become
1270 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1273 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1274 * it in `*result'. If `result' is NULL, merely checks whether all
1275 * characters in the string are feasible.
1277 * Return is nonzero if all characters are OK. If not all
1278 * characters are OK but `result' is non-NULL, a result _will_
1279 * still be generated!
1281 static int xhtml_convert(wchar_t *s
, char **result
, int hard_spaces
) {
1282 int doing
= (result
!= 0);
1285 int plen
= 0, psize
= 0;
1290 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1292 if (((c
== 32 && !hard_spaces
) || (c
> 32 && c
<= 126 && !xhtml_reservedchar(c
)))) {
1297 p
[plen
++] = (char)c
;
1300 /* Char needs fixing up. */
1301 /* ok = FALSE; -- currently we never return FALSE; we
1302 * might want to when considering a character set for the
1307 if (c
==32) { /* a space in a word is a hard space */
1308 ensure_size(plen
+6); /* includes space for the NUL, which is subsequently stomped on */
1309 sprintf(p
+plen
, " ");
1312 /* FIXME: entity names! */
1313 ensure_size(plen
+8); /* includes space for the NUL, which is subsequently stomped on */
1314 plen
+=sprintf(p
+plen
, "&#%04i;", (int)c
);
1320 p
= resize(p
, plen
+1);
1328 * This formats the given words as XHTML.
1330 static void xhtml_rdaddwc(rdstringc
*rs
, word
*text
, word
*end
) {
1337 for (; text
&& text
!= end
; text
= text
->next
) {
1338 switch (text
->type
) {
1339 case word_HyperLink
:
1340 xhtml_utostr(text
->text
, &c
);
1341 rdaddsc(rs
, "<a href=\"");
1347 case word_UpperXref
:
1348 case word_LowerXref
:
1349 kwl
= kw_lookup(keywords
, text
->text
);
1351 sect
=xhtml_find_section(kwl
->para
);
1353 rdaddsc(rs
, "<a href=\"");
1354 rdaddsc(rs
, sect
->file
->filename
);
1356 rdaddsc(rs
, sect
->fragment
);
1359 rdaddsc(rs
, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1360 error(err_whatever
, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1363 rdaddsc(rs
, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1364 error(err_whatever
, "Couldn't locate cross-reference! (Wasn't in source file.)");
1368 case word_IndexRef
: /* in theory we could make an index target here */
1369 /* rdaddsc(rs, "<a name=\"idx-");
1370 xhtml_utostr(text->text, &c);
1373 rdaddsc(rs, "\"></a>");*/
1374 /* what we _do_ need to do is to fix up the backend data
1375 * for any indexentry this points to.
1377 for (ti
=0; (itag
= (indextag
*)index234(idx
->tags
, ti
))!=NULL
; ti
++) {
1378 /* FIXME: really ustricmp() and not ustrcmp()? */
1379 if (ustricmp(itag
->name
, text
->text
)==0) {
1384 if (itag
->refs
!=NULL
) {
1386 for (i
=0; i
<itag
->nrefs
; i
++) {
1387 xhtmlindex
*idx_ref
;
1390 ientry
= itag
->refs
[i
];
1391 if (ientry
->backend_data
==NULL
) {
1392 idx_ref
= (xhtmlindex
*) smalloc(sizeof(xhtmlindex
));
1394 fatal(err_nomemory
);
1395 idx_ref
->nsection
= 0;
1397 idx_ref
->sections
= (xhtmlsection
**) smalloc(idx_ref
->size
* sizeof(xhtmlsection
*));
1398 if (idx_ref
->sections
==NULL
)
1399 fatal(err_nomemory
);
1400 ientry
->backend_data
= idx_ref
;
1402 idx_ref
= ientry
->backend_data
;
1403 if (idx_ref
->nsection
+1 > idx_ref
->size
) {
1404 int new_size
= idx_ref
->size
* 2;
1405 idx_ref
->sections
= srealloc(idx_ref
->sections
, new_size
* sizeof(xhtmlsection
));
1406 if (idx_ref
->sections
==NULL
) {
1407 fatal(err_nomemory
);
1409 idx_ref
->size
= new_size
;
1412 idx_ref
->sections
[idx_ref
->nsection
++] = currentsection
;
1417 fatal(err_whatever
, "Index tag had no entries!");
1420 fprintf(stderr
, "Looking for index entry '%ls'\n", text
->text
);
1421 fatal(err_whatever
, "Couldn't locate index entry! (Wasn't in index.)");
1427 rdaddsc(rs
, "</a>");
1434 case word_WhiteSpace
:
1435 case word_EmphSpace
:
1436 case word_CodeSpace
:
1437 case word_WkCodeSpace
:
1439 case word_EmphQuote
:
1440 case word_CodeQuote
:
1441 case word_WkCodeQuote
:
1442 assert(text
->type
!= word_CodeQuote
&&
1443 text
->type
!= word_WkCodeQuote
);
1444 if (towordstyle(text
->type
) == word_Emph
&&
1445 (attraux(text
->aux
) == attr_First
||
1446 attraux(text
->aux
) == attr_Only
))
1447 rdaddsc(rs
, "<em>");
1448 else if ((towordstyle(text
->type
) == word_Code
|| towordstyle(text
->type
) == word_WeakCode
) &&
1449 (attraux(text
->aux
) == attr_First
||
1450 attraux(text
->aux
) == attr_Only
))
1451 rdaddsc(rs
, "<code>");
1453 if (removeattr(text
->type
) == word_Normal
) {
1454 if (xhtml_convert(text
->text
, &c
, TRUE
)) /* spaces in the word are hard */
1457 xhtml_rdaddwc(rs
, text
->alt
, NULL
);
1459 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
1461 } else if (removeattr(text
->type
) == word_Quote
) {
1462 rdaddsc(rs
, """);
1465 if (towordstyle(text
->type
) == word_Emph
&&
1466 (attraux(text
->aux
) == attr_Last
||
1467 attraux(text
->aux
) == attr_Only
))
1468 rdaddsc(rs
, "</em>");
1469 else if ((towordstyle(text
->type
) == word_Code
|| towordstyle(text
->type
) == word_WeakCode
) &&
1470 (attraux(text
->aux
) == attr_Last
||
1471 attraux(text
->aux
) == attr_Only
))
1472 rdaddsc(rs
, "</code>");
1478 /* Output a heading, formatted as XHTML.
1480 static void xhtml_heading(FILE *fp
, paragraph
*p
)
1482 rdstringc t
= { 0, 0, NULL
};
1483 word
*tprefix
= p
->kwtext
;
1484 word
*nprefix
= p
->kwtext2
;
1485 word
*text
= p
->words
;
1486 int level
= xhtml_para_level(p
);
1487 xhtmlsection
*sect
= xhtml_find_section(p
);
1491 fragment
= sect
->fragment
;
1493 if (p
->type
== para_Title
)
1496 fragment
= ""; /* FIXME: what else can we do? */
1497 error(err_whatever
, "Couldn't locate heading cross-reference!");
1501 if (p
->type
== para_Title
)
1503 else if (level
== 1)
1504 fmt
= &conf
.fchapter
;
1505 else if (level
-1 < conf
.nfsect
)
1506 fmt
= &conf
.fsect
[level
-1];
1508 fmt
= &conf
.fsect
[conf
.nfsect
-1];
1510 if (fmt
&& fmt
->just_numbers
&& nprefix
) {
1511 xhtml_rdaddwc(&t
, nprefix
, NULL
);
1514 if (xhtml_convert(fmt
->number_suffix
, &c
, FALSE
)) {
1519 } else if (fmt
&& !fmt
->just_numbers
&& tprefix
) {
1520 xhtml_rdaddwc(&t
, tprefix
, NULL
);
1523 if (xhtml_convert(fmt
->number_suffix
, &c
, FALSE
)) {
1529 xhtml_rdaddwc(&t
, text
, NULL
);
1531 * If we're outputting in single-file mode, we need to lower
1532 * the level of each heading by one, because the overall
1533 * document title will be sitting right at the top as an <h1>
1534 * and so chapters and sections should start at <h2>.
1536 * Even if not, the document title will come back from
1537 * xhtml_para_level() as level zero, so we must increment that
1538 * no matter what leaf_level is set to.
1540 if (conf
.leaf_level
== 0 || level
== 0)
1542 fprintf(fp
, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment
, level
, t
.text
, level
);
1546 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1547 * This looks pretty simple; I may have missed something ...
1549 static void xhtml_para(FILE *fp
, word
*text
)
1551 rdstringc out
= { 0, 0, NULL
};
1552 xhtml_rdaddwc(&out
, text
, NULL
);
1553 fprintf(fp
, "%s", out
.text
);
1557 /* Output a code paragraph. I'm treating this as preformatted, which
1558 * may not be entirely correct. See xhtml_para() for my worries about
1559 * this being overly-simple; however I think that most of the complexity
1560 * of the text backend came entirely out of word wrapping anyway.
1562 static void xhtml_codepara(FILE *fp
, word
*text
)
1564 fprintf(fp
, "<pre>");
1565 for (; text
; text
= text
->next
) if (text
->type
== word_WeakCode
) {
1567 xhtml_convert(text
->text
, &c
, FALSE
);
1568 fprintf(fp
, "%s\n", c
);
1571 fprintf(fp
, "</pre>\n");