2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
34 struct xhtmlsection_Struct
{
35 struct xhtmlsection_Struct
*next
; /* next sibling (NULL if split across files) */
36 struct xhtmlsection_Struct
*child
; /* NULL if split across files */
37 struct xhtmlsection_Struct
*parent
; /* NULL if split across files */
38 struct xhtmlsection_Struct
*chain
; /* single structure independent of weird trees */
40 struct xhtmlfile_Struct
*file
; /* which file is this a part of? */
41 char *fragment
; /* fragment id within the file */
45 struct xhtmlfile_Struct
{
46 struct xhtmlfile_Struct
*next
;
47 struct xhtmlfile_Struct
*child
;
48 struct xhtmlfile_Struct
*parent
;
50 struct xhtmlsection_Struct
*sections
; /* sections within this file (only one for non-leaf) */
51 int is_leaf
; /* is this file a leaf file, ie does it not have any children? */
54 typedef struct xhtmlsection_Struct xhtmlsection
;
55 typedef struct xhtmlfile_Struct xhtmlfile
;
56 typedef struct xhtmlindex_Struct xhtmlindex
;
58 struct xhtmlindex_Struct
{
61 xhtmlsection
**sections
;
65 int contents_depth
[6];
66 int leaf_contains_contents
;
68 int leaf_smallest_contents
;
69 int include_version_id
;
70 wchar_t *author
, *description
;
71 wchar_t *head_end
, *body
, *body_start
, *body_end
, *address_start
, *address_end
, *nav_attrs
;
75 /*static void xhtml_level(paragraph *, int);
76 static void xhtml_level_0(paragraph *);
77 static void xhtml_docontents(FILE *, paragraph *, int);
78 static void xhtml_dosections(FILE *, paragraph *, int);
79 static void xhtml_dobody(FILE *, paragraph *, int);*/
81 static void xhtml_doheader(FILE *, word
*);
82 static void xhtml_dofooter(FILE *);
83 static void xhtml_versionid(FILE *, word
*, int);
85 static void xhtml_utostr(wchar_t *, char **);
86 static int xhtml_para_level(paragraph
*);
87 static int xhtml_reservedchar(int);
89 static int xhtml_convert(wchar_t *, char **, int);
90 static void xhtml_rdaddwc(rdstringc
*, word
*, word
*);
91 static void xhtml_para(FILE *, word
*);
92 static void xhtml_codepara(FILE *, word
*);
93 static void xhtml_heading(FILE *, paragraph
*);
95 /* File-global variables are much easier than passing these things
96 * all over the place. Evil, but easier. We can replace this with a single
97 * structure at some point.
99 static xhtmlconfig conf
;
100 static keywordlist
*keywords
;
101 static indexdata
*idx
;
102 static xhtmlfile
*topfile
;
103 static xhtmlsection
*topsection
;
104 static paragraph
*sourceparas
;
105 static xhtmlfile
*lastfile
;
106 static xhtmlfile
*xhtml_last_file
= NULL
;
107 static int last_level
=-1;
108 static xhtmlsection
*currentsection
;
110 static xhtmlconfig
xhtml_configure(paragraph
*source
)
117 ret
.contents_depth
[0] = 2;
118 ret
.contents_depth
[1] = 3;
119 ret
.contents_depth
[2] = 4;
120 ret
.contents_depth
[3] = 5;
121 ret
.contents_depth
[4] = 6;
122 ret
.contents_depth
[5] = 7;
124 ret
.leaf_smallest_contents
= 4;
125 ret
.leaf_contains_contents
= FALSE
;
126 ret
.include_version_id
= TRUE
;
128 ret
.description
= NULL
;
131 ret
.body_start
= NULL
;
133 ret
.address_start
= NULL
;
134 ret
.address_end
= NULL
;
135 ret
.nav_attrs
= NULL
;
136 ret
.suppress_address
= FALSE
;
138 for (; source
; source
= source
->next
)
140 if (source
->type
== para_Config
)
142 if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-0")) {
143 ret
.contents_depth
[0] = utoi(uadv(source
->keyword
));
144 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-1")) {
145 ret
.contents_depth
[1] = utoi(uadv(source
->keyword
));
146 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-2")) {
147 ret
.contents_depth
[2] = utoi(uadv(source
->keyword
));
148 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-3")) {
149 ret
.contents_depth
[3] = utoi(uadv(source
->keyword
));
150 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-4")) {
151 ret
.contents_depth
[4] = utoi(uadv(source
->keyword
));
152 } else if (!ustricmp(source
->keyword
, L
"xhtml-contents-depth-5")) {
153 ret
.contents_depth
[5] = utoi(uadv(source
->keyword
));
154 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-level")) {
155 ret
.leaf_level
= utoi(uadv(source
->keyword
));
156 if (ret
.leaf_level
==0) {
157 fatal(err_whatever
, "xhtml-leaf-level cannot be zero");
159 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-smallest-contents")) {
160 ret
.leaf_smallest_contents
= utoi(uadv(source
->keyword
));
161 } else if (!ustricmp(source
->keyword
, L
"xhtml-versionid")) {
162 ret
.include_version_id
= utob(uadv(source
->keyword
));
163 } else if (!ustricmp(source
->keyword
, L
"xhtml-leaf-contains-contents")) {
164 ret
.leaf_contains_contents
= utob(uadv(source
->keyword
));
165 } else if (!ustricmp(source
->keyword
, L
"xhtml-suppress-address")) {
166 ret
.suppress_address
= utob(uadv(source
->keyword
));
167 } else if (!ustricmp(source
->keyword
, L
"xhtml-author")) {
168 ret
.author
= uadv(source
->keyword
);
169 } else if (!ustricmp(source
->keyword
, L
"xhtml-description")) {
170 ret
.description
= uadv(source
->keyword
);
171 } else if (!ustricmp(source
->keyword
, L
"xhtml-head-end")) {
172 ret
.head_end
= uadv(source
->keyword
);
173 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-start")) {
174 ret
.body_start
= uadv(source
->keyword
);
175 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-tag")) {
176 ret
.body
= uadv(source
->keyword
);
177 } else if (!ustricmp(source
->keyword
, L
"xhtml-body-end")) {
178 ret
.body_end
= uadv(source
->keyword
);
179 } else if (!ustricmp(source
->keyword
, L
"xhtml-address-start")) {
180 ret
.address_start
= uadv(source
->keyword
);
181 } else if (!ustricmp(source
->keyword
, L
"xhtml-address-end")) {
182 ret
.address_end
= uadv(source
->keyword
);
183 } else if (!ustricmp(source
->keyword
, L
"xhtml-navigation-attributes")) {
184 ret
.nav_attrs
= uadv(source
->keyword
);
189 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
190 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
191 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
192 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
193 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
194 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
195 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
196 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
200 static xhtmlsection
*xhtml_new_section(xhtmlsection
*last
)
202 xhtmlsection
*ret
= mknew(xhtmlsection
);
210 ret
->level
=-1; /* marker: end of chain */
214 /* Returns NULL or the section that marks that paragraph */
215 static xhtmlsection
*xhtml_find_section(paragraph
*p
)
217 xhtmlsection
*ret
= topsection
;
218 if (xhtml_para_level(p
)==-1) { /* first, we back-track to a section paragraph */
219 paragraph
*p2
= sourceparas
;
220 paragraph
*p3
= NULL
;
221 while (p2
&& p2
!=p
) {
222 if (xhtml_para_level(p2
)!=-1) {
227 if (p3
==NULL
) { /* for some reason, we couldn't find a section before this paragraph ... ? */
228 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
229 * So don't do that, then.
235 while (ret
&& ret
->para
!= p
) {
236 /* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
242 static xhtmlfile
*xhtml_new_file(xhtmlsection
*sect
)
244 xhtmlfile
*ret
= mknew(xhtmlfile
);
251 ret
->is_leaf
=(sect
!=NULL
&& sect
->level
==conf
.leaf_level
);
253 if (conf
.leaf_level
==0) { /* currently unused */
254 #define FILENAME_MANUAL "Manual.html"
255 #define FILENAME_CONTENTS "Contents.html"
256 ret
->filename
= smalloc(strlen(FILENAME_MANUAL
)+1);
257 sprintf(ret
->filename
, FILENAME_MANUAL
);
259 ret
->filename
= smalloc(strlen(FILENAME_CONTENTS
)+1);
260 sprintf(ret
->filename
, FILENAME_CONTENTS
);
263 paragraph
*p
= sect
->para
;
264 rdstringc fname_c
= { 0, 0, NULL
};
267 for (w
=(p
->kwtext
)?
(p
->kwtext
):(p
->words
); w
; w
=w
->next
)
269 switch (removeattr(w
->type
))
274 case word_WeakCode:*/
275 xhtml_utostr(w
->text
, &c
);
281 rdaddsc(&fname_c
, ".html");
282 ret
->filename
= rdtrimc(&fname_c
);
284 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
289 * Walk the tree fixing up files which are actually leaf (ie
290 * have no children) but aren't at leaf level, so they have the
293 void xhtml_fixup_layout(xhtmlfile
* file
)
295 if (file
->child
==NULL
) {
296 file
->is_leaf
= TRUE
;
298 xhtml_fixup_layout(file
->child
);
301 xhtml_fixup_layout(file
->next
);
305 * Create the tree structure so we know where everything goes.
308 * Ignoring file splitting, we have three choices with each new section:
310 * +-----------------+-----------------+
318 * Y is the last section we added (currentsect).
319 * If sect is the section we want to add, then:
321 * (1) if sect->level < currentsect->level
322 * (2) if sect->level == currentsect->level
323 * (3) if sect->level > currentsect->level
325 * This requires the constraint that you never skip section numbers
326 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
328 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
329 * more than one level at a time. Lots of asserts, and probably part of
330 * the algorithm here, rely on this being true. (It currently isn't
331 * enforced by halibut, however.)
333 * File splitting makes this harder. For instance, say we added at (3)
334 * above and now need to add another section. We are splitting at level
335 * 2, ie the level of Y. Z is the last section we added:
337 * +-----------------+-----------------+
347 * The (1) case is now split; we need to search upwards to find where
348 * to actually link in. The other two cases remain the same (and will
349 * always be like this).
351 * File splitting makes this harder, however. The decision of whether
352 * to split to a new file is always on the same condition, however (is
353 * the level of this section higher than the leaf_level configuration
356 * Treating the cases backwards:
358 * (3) same file if sect->level > conf.leaf_level, otherwise new file
360 * if in the same file, currentsect->child points to sect
361 * otherwise the linking is done through the file tree (which works
362 * in more or less the same way, ie currentfile->child points to
365 * (2) same file if sect->level > conf.leaf_level, otherwise new file
367 * if in the same file, currentsect->next points to sect
368 * otherwise file linking and currentfile->next points to the new
369 * file (we know that Z must have caused a new file to be created)
371 * (1) same file if sect->level > conf.leaf_level, otherwise new file
373 * this is actually effectively the same case as (2) here,
374 * except that we first have to travel up the sections to figure
375 * out which section this new one will be a sibling of. In doing
376 * so, we may disappear off the top of a file and have to go up
377 * to its parent in the file tree.
380 static void xhtml_ponder_layout(paragraph
*p
)
382 xhtmlsection
*lastsection
;
383 xhtmlsection
*currentsect
;
384 xhtmlfile
*currentfile
;
387 topsection
= xhtml_new_section(NULL
);
388 topfile
= xhtml_new_file(NULL
);
389 lastsection
= topsection
;
390 currentfile
= topfile
;
391 currentsect
= topsection
;
395 int level
= xhtml_para_level(p
);
396 if (level
>0) /* actually a section */
401 rdstringc fname_c
= { 0, 0, NULL
};
403 sect
= xhtml_new_section(lastsection
);
406 for (w
=(p
->kwtext2
)?
(p
->kwtext2
):(p
->words
); w
; w
=w
->next
) /* kwtext2 because we want numbers only! */
408 switch (removeattr(w
->type
))
413 case word_WeakCode:*/
414 xhtml_utostr(w
->text
, &c
);
420 /* rdaddsc(&fname_c, ".html");*/
421 sect
->fragment
= rdtrimc(&fname_c
);
423 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
425 if (level
>currentsect
->level
) { /* case (3) */
426 if (level
>conf
.leaf_level
) { /* same file */
427 assert(currentfile
->is_leaf
);
428 currentsect
->child
= sect
;
429 sect
->parent
=currentsect
;
430 sect
->file
=currentfile
;
431 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
433 } else { /* new file */
434 xhtmlfile
*file
= xhtml_new_file(sect
);
435 assert(!currentfile
->is_leaf
);
436 currentfile
->child
=file
;
438 file
->parent
=currentfile
;
439 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
443 } else if (level
>= currentsect
->file
->sections
->level
) {
444 /* Case (1) or (2) *AND* still under the section that starts
447 * I'm not convinced that this couldn't be rolled in with the
448 * final else {} leg further down. It seems a lot of effort
451 if (level
>conf
.leaf_level
) { /* stick within the same file */
452 assert(currentfile
->is_leaf
);
453 sect
->file
= currentfile
;
454 while (currentsect
&& currentsect
->level
> level
&&
455 currentsect
->file
==currentsect
->parent
->file
) {
456 currentsect
= currentsect
->parent
;
459 currentsect
->next
= sect
;
460 assert(currentsect
->level
== sect
->level
);
461 sect
->parent
= currentsect
->parent
;
463 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
464 } else { /* new file */
465 xhtmlfile
*file
= xhtml_new_file(sect
);
467 currentfile
->next
=file
;
468 file
->parent
=currentfile
->parent
;
469 file
->is_leaf
=(level
==conf
.leaf_level
);
471 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
475 } else { /* Case (1) or (2) and we must move up the file tree first */
476 /* this loop is now probably irrelevant - we know we can't connect
477 * to anything in the current file */
478 while (currentsect
&& level
<currentsect
->level
) {
479 currentsect
=currentsect
->parent
;
481 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
483 /* printf(" * up one level (off top of current file)\n");*/
487 /* I'm pretty sure this can now never fire */
488 assert(currentfile
->is_leaf
);
489 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
490 sect
->file
= currentfile
;
491 currentsect
->next
=sect
;
493 } else { /* find a file we can attach to */
494 while (currentfile
&& currentfile
->sections
&& level
<currentfile
->sections
->level
) {
495 currentfile
=currentfile
->parent
;
497 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
499 /* printf(" * up one file level (off top of tree)\n");*/
502 if (currentfile
) { /* new file (we had to skip up a file to
503 get here, so we must be dealing with a
504 level no lower than the configured
506 xhtmlfile
*file
= xhtml_new_file(sect
);
507 currentfile
->next
=file
;
509 file
->parent
=currentfile
->parent
;
510 file
->is_leaf
=(level
==conf
.leaf_level
);
512 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
516 fatal(err_whatever
, "Ran off the top trying to connect sibling: strange document.");
522 topsection
= lastsection
; /* get correct end of the chain */
523 xhtml_fixup_layout(topfile
); /* leaf files not at leaf level marked as such */
526 static void xhtml_do_index();
527 static void xhtml_do_file(xhtmlfile
*file
);
528 static void xhtml_do_top_file(xhtmlfile
*file
, paragraph
*sourceform
);
529 static void xhtml_do_paras(FILE *fp
, paragraph
*p
);
530 static int xhtml_do_contents_limit(FILE *fp
, xhtmlfile
*file
, int limit
);
531 static int xhtml_do_contents_section_limit(FILE *fp
, xhtmlsection
*section
, int limit
);
532 static int xhtml_add_contents_entry(FILE *fp
, xhtmlsection
*section
, int limit
);
533 static int xhtml_do_contents(FILE *fp
, xhtmlfile
*file
);
534 static int xhtml_do_naked_contents(FILE *fp
, xhtmlfile
*file
);
535 static void xhtml_do_sections(FILE *fp
, xhtmlsection
*sections
);
538 * Do all the files in this structure.
540 static void xhtml_do_files(xhtmlfile
*file
)
544 xhtml_do_files(file
->child
);
546 xhtml_do_files(file
->next
);
550 * Free up all memory used by the file tree from 'xfile' downwards
552 static void xhtml_free_file(xhtmlfile
* xfile
)
558 if (xfile
->filename
) {
559 sfree(xfile
->filename
);
561 xhtml_free_file(xfile
->child
);
562 xhtml_free_file(xfile
->next
);
569 void xhtml_backend(paragraph
*sourceform
, keywordlist
*in_keywords
,
577 sourceparas
= sourceform
;
578 conf
= xhtml_configure(sourceform
);
579 keywords
= in_keywords
;
582 /* Clear up the index entries backend data pointers */
583 for (ti
=0; (ientry
= (indexentry
*)index234(idx
->entries
, ti
))!=NULL
; ti
++) {
584 ientry
->backend_data
=NULL
;
587 xhtml_ponder_layout(sourceform
);
589 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
590 /* xhtml_level_0(sourceform);
591 for (i=1; i<=conf.leaf_level; i++)
593 xhtml_level(sourceform, i);
596 /* new system ... (writes to *.html, but isn't fully trusted) */
597 xhtml_do_top_file(topfile
, sourceform
);
598 assert(!topfile
->next
); /* shouldn't have a sibling at all */
599 xhtml_do_files(topfile
->child
);
602 /* release file, section, index data structures */
605 xhtmlsection
*tmp
= xsect
->chain
;
606 if (xsect
->fragment
) {
607 sfree(xsect
->fragment
);
612 xhtml_free_file(topfile
);
613 for (ti
= 0; (ientry
=(indexentry
*)index234(idx
->entries
, ti
))!=NULL
; ti
++) {
614 if (ientry
->backend_data
!=NULL
) {
615 xhtmlindex
*xi
= (xhtmlindex
*) ientry
->backend_data
;
616 if (xi
->sections
!=NULL
) {
621 ientry
->backend_data
= NULL
;
625 static int xhtml_para_level(paragraph
*p
)
629 case para_UnnumberedChapter
:
634 /* case para_BiblioCited:
647 static char* xhtml_index_filename
= "IndexPage.html";
649 /* Output the nav links for the current file.
650 * file == NULL means we're doing the index
652 static void xhtml_donavlinks(FILE *fp
, xhtmlfile
*file
)
654 xhtmlfile
*xhtml_next_file
= NULL
;
656 if (conf
.nav_attrs
!=NULL
) {
657 fprintf(fp
, " %ls>", conf
.nav_attrs
);
661 if (xhtml_last_file
==NULL
) {
662 fprintf(fp
, "Previous | ");
664 fprintf(fp
, "<a href='%s'>Previous</a> | ", xhtml_last_file
->filename
);
666 fprintf(fp
, "<a href='Contents.html'>Contents</a> | ");
667 if (file
!= NULL
) { /* otherwise we're doing nav links for the index */
668 if (xhtml_next_file
==NULL
)
669 xhtml_next_file
= file
->child
;
670 if (xhtml_next_file
==NULL
)
671 xhtml_next_file
= file
->next
;
672 if (xhtml_next_file
==NULL
)
673 xhtml_next_file
= file
->parent
->next
;
675 if (xhtml_next_file
==NULL
) {
676 if (file
==NULL
) { /* index, so no next file */
677 fprintf(fp
, "Next ");
679 fprintf(fp
, "<a href='%s'>Next</a>", xhtml_index_filename
);
682 fprintf(fp
, "<a href='%s'>Next</a>", xhtml_next_file
->filename
);
684 fprintf(fp
, "</p>\n");
687 /* Write out the index file */
688 static void xhtml_do_index()
690 word temp_word
= { NULL
, NULL
, word_Normal
, 0, 0, L
"Index", { NULL
, 0, 0} };
693 FILE *fp
= fopen(xhtml_index_filename
, "w");
696 fatal(err_cantopenw
, xhtml_index_filename
);
697 xhtml_doheader(fp
, &temp_word
);
698 xhtml_donavlinks(fp
, NULL
);
700 fprintf(fp
, "<dl>\n");
701 /* iterate over idx->entries using the tree functions and display everything */
702 for (ti
= 0; (y
= (indexentry
*)index234(idx
->entries
, ti
)) != NULL
; ti
++) {
703 if (y
->backend_data
) {
708 xhtml_para(fp
, y
->text
);
709 fprintf(fp
, "</dt>\n<dd>");
711 xi
= (xhtmlindex
*) y
->backend_data
;
712 for (i
=0; i
<xi
->nsection
; i
++) {
713 xhtmlsection
*sect
= xi
->sections
[i
];
715 fprintf(fp
, "<a href='%s#%s'>", sect
->file
->filename
, sect
->fragment
);
716 if (sect
->para
->kwtext
) {
717 xhtml_para(fp
, sect
->para
->kwtext
);
718 } else if (sect
->para
->words
) {
719 xhtml_para(fp
, sect
->para
->words
);
722 if (i
+1<xi
->nsection
) {
727 fprintf(fp
, "</dd>\n");
730 fprintf(fp
, "</dl>\n");
732 xhtml_donavlinks(fp
, NULL
);
737 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
738 static void xhtml_do_file(xhtmlfile
*file
)
740 FILE *fp
= fopen(file
->filename
, "w");
742 fatal(err_cantopenw
, file
->filename
);
744 if (file
->sections
->para
->words
) {
745 xhtml_doheader(fp
, file
->sections
->para
->words
);
746 } else if (file
->sections
->para
->kwtext
) {
747 xhtml_doheader(fp
, file
->sections
->para
->kwtext
);
749 xhtml_doheader(fp
, NULL
);
752 xhtml_donavlinks(fp
, file
);
754 if (file
->is_leaf
&& conf
.leaf_contains_contents
&& xhtml_do_contents(NULL
, file
)>=conf
.leaf_smallest_contents
)
755 xhtml_do_contents(fp
, file
);
756 xhtml_do_sections(fp
, file
->sections
);
758 xhtml_do_naked_contents(fp
, file
);
760 xhtml_donavlinks(fp
, file
);
765 xhtml_last_file
= file
;
768 /* Output the top-level file. */
769 static void xhtml_do_top_file(xhtmlfile
*file
, paragraph
*sourceform
)
773 FILE *fp
= fopen(file
->filename
, "w");
775 fatal(err_cantopenw
, file
->filename
);
777 /* Do the title -- only one allowed */
778 for (p
= sourceform
; p
&& !done
; p
= p
->next
)
780 if (p
->type
== para_Title
)
782 xhtml_doheader(fp
, p
->words
);
787 xhtml_doheader(fp
, NULL
/* Eek! */);
789 /* Do the preamble and copyright */
790 for (p
= sourceform
; p
; p
= p
->next
)
792 if (p
->type
== para_Preamble
)
795 xhtml_para(fp
, p
->words
);
796 fprintf(fp
, "</p>\n");
799 for (p
= sourceform
; p
; p
= p
->next
)
801 if (p
->type
== para_Copyright
)
804 xhtml_para(fp
, p
->words
);
805 fprintf(fp
, "</p>\n");
809 xhtml_do_contents(fp
, file
);
810 xhtml_do_sections(fp
, file
->sections
);
815 /* Convert a Unicode string to an ASCII one. '?' is
816 * used for unmappable characters.
818 static void xhtml_utostr(wchar_t *in
, char **out
)
825 if (in
[i
]>=32 && in
[i
]<=126)
826 (*out
)[i
]=(char)in
[i
];
834 * Write contents for the given file, and subfiles, down to
835 * the appropriate contents depth. Returns the number of
838 static int xhtml_do_contents(FILE *fp
, xhtmlfile
*file
)
840 int level
, limit
, start_level
, count
= 0;
844 level
= (file
->sections
)?
(file
->sections
->level
):(0);
845 limit
= conf
.contents_depth
[(level
>5)?
(5):(level
)];
846 start_level
= (file
->is_leaf
) ?
(level
-1) : (level
);
847 last_level
= start_level
;
849 count
+= xhtml_do_contents_section_limit(fp
, file
->sections
, limit
);
850 count
+= xhtml_do_contents_limit(fp
, file
->child
, limit
);
852 while (last_level
> start_level
) {
854 fprintf(fp
, "</ul>\n");
860 /* As above, but doesn't do anything in the current file */
861 static int xhtml_do_naked_contents(FILE *fp
, xhtmlfile
*file
)
863 int level
, limit
, start_level
, count
= 0;
867 level
= (file
->sections
)?
(file
->sections
->level
):(0);
868 limit
= conf
.contents_depth
[(level
>5)?
(5):(level
)];
869 start_level
= (file
->is_leaf
) ?
(level
-1) : (level
);
870 last_level
= start_level
;
872 count
= xhtml_do_contents_limit(fp
, file
->child
, limit
);
874 while (last_level
> start_level
) {
876 fprintf(fp
, "</ul>\n");
883 * Write contents for the given file, children, and siblings, down to
884 * given limit contents depth.
886 static int xhtml_do_contents_limit(FILE *fp
, xhtmlfile
*file
, int limit
)
890 count
+= xhtml_do_contents_section_limit(fp
, file
->sections
, limit
);
891 count
+= xhtml_do_contents_limit(fp
, file
->child
, limit
);
898 * Write contents entries for the given section tree, down to the
899 * limit contents depth.
901 static int xhtml_do_contents_section_deep_limit(FILE *fp
, xhtmlsection
*section
, int limit
)
905 if (!xhtml_add_contents_entry(fp
, section
, limit
))
909 count
+= xhtml_do_contents_section_deep_limit(fp
, section
->child
, limit
);
910 section
= section
->next
;
916 * Write contents entries for the given section tree, down to the
917 * limit contents depth.
919 static int xhtml_do_contents_section_limit(FILE *fp
, xhtmlsection
*section
, int limit
)
924 xhtml_add_contents_entry(fp
, section
, limit
);
926 count
+= xhtml_do_contents_section_deep_limit(fp
, section
->child
, limit
);
927 /* section=section->child;
928 while (section && xhtml_add_contents_entry(fp, section, limit)) {
929 section = section->next;
935 * Add a section entry, unless we're exceeding the limit, in which
936 * case return FALSE (otherwise return TRUE).
938 static int xhtml_add_contents_entry(FILE *fp
, xhtmlsection
*section
, int limit
)
940 if (!section
|| section
->level
> limit
)
944 while (last_level
> section
->level
) {
946 fprintf(fp
, "</ul>\n");
948 while (last_level
< section
->level
) {
950 fprintf(fp
, "<ul>\n");
952 fprintf(fp
, "<li><a href=\"%s#%s\">", section
->file
->filename
, section
->fragment
);
953 if (section
->para
->kwtext
) {
954 xhtml_para(fp
, section
->para
->kwtext
);
955 if (section
->para
->words
) {
959 if (section
->para
->words
) {
960 xhtml_para(fp
, section
->para
->words
);
962 fprintf(fp
, "</a></li>\n");
967 * Write all the sections in this file. Do all paragraphs in this section, then all
968 * children (recursively), then go on to the next one (tail recursively).
970 static void xhtml_do_sections(FILE *fp
, xhtmlsection
*sections
)
973 currentsection
= sections
;
974 xhtml_do_paras(fp
, sections
->para
);
975 xhtml_do_sections(fp
, sections
->child
);
976 sections
= sections
->next
;
980 /* Write this list of paragraphs. Close off all lists at the end. */
981 static void xhtml_do_paras(FILE *fp
, paragraph
*p
)
983 int last_type
= -1, first
=TRUE
;
987 /* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
988 for (; p
&& (xhtml_para_level(p
)==-1 || first
); p
=p
->next
) {
993 * Things we ignore because we've already processed them or
994 * aren't going to touch them in this pass.
998 case para_Biblio
: /* only touch BiblioCited */
1000 case para_Copyright
:
1011 case para_UnnumberedChapter
:
1012 xhtml_heading(fp
, p
);
1017 xhtml_heading(fp
, p
);
1021 fprintf(fp
, "\n<hr />\n");
1025 fprintf(fp
, "\n<p>");
1026 xhtml_para(fp
, p
->words
);
1027 fprintf(fp
, "</p>\n");
1031 case para_NumberedList
:
1032 case para_BiblioCited
:
1033 if (last_type
!=p
->type
) {
1034 /* start up list if necessary */
1035 if (p
->type
== para_Bullet
) {
1036 fprintf(fp
, "<ul>\n");
1037 } else if (p
->type
== para_NumberedList
) {
1038 fprintf(fp
, "<ol>\n");
1039 } else if (p
->type
== para_BiblioCited
) {
1040 fprintf(fp
, "<dl>\n");
1043 if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
)
1044 fprintf(fp
, "<li>");
1045 else if (p
->type
== para_BiblioCited
) {
1046 fprintf(fp
, "<dt>");
1047 xhtml_para(fp
, p
->kwtext
);
1048 fprintf(fp
, "</dt>\n<dd>");
1050 xhtml_para(fp
, p
->words
);
1051 if (p
->type
== para_BiblioCited
) {
1052 fprintf(fp
, "</dd>\n");
1053 } else if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
) {
1054 fprintf(fp
, "</li>");
1056 if (p
->type
== para_Bullet
|| p
->type
== para_NumberedList
|| p
->type
== para_BiblioCited
)
1057 /* close off list if necessary */
1059 paragraph
*p2
= p
->next
;
1060 int close_off
=FALSE
;
1061 /* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1062 if (p2
&& xhtml_para_level(p2
)==-1) {
1063 if (p2
->type
!= p
->type
)
1069 if (p
->type
== para_Bullet
) {
1070 fprintf(fp
, "</ul>\n");
1071 } else if (p
->type
== para_NumberedList
) {
1072 fprintf(fp
, "</ol>\n");
1073 } else if (p
->type
== para_BiblioCited
) {
1074 fprintf(fp
, "</dl>\n");
1081 xhtml_codepara(fp
, p
->words
);
1084 last_type
= p
->type
;
1089 * Output a header for this XHTML file.
1091 static void xhtml_doheader(FILE *fp
, word
*title
)
1093 fprintf(fp
, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1094 fprintf(fp
, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1095 fprintf(fp
, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1097 fprintf(fp
, "The thing with no name!");
1099 xhtml_para(fp
, title
);
1100 fprintf(fp
, "</title>\n");
1101 fprintf(fp
, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version
);
1103 fprintf(fp
, "<meta name=\"author\" content=\"%ls\" />\n", conf
.author
);
1104 if (conf
.description
)
1105 fprintf(fp
, "<meta name=\"description\" content=\"%ls\" />\n", conf
.description
);
1107 fprintf(fp
, "%ls\n", conf
.head_end
);
1108 fprintf(fp
, "</head>\n\n");
1110 fprintf(fp
, "%ls\n", conf
.body
);
1112 fprintf(fp
, "<body>\n");
1113 if (conf
.body_start
)
1114 fprintf(fp
, "%ls\n", conf
.body_start
);
1118 * Output a footer for this XHTML file.
1120 static void xhtml_dofooter(FILE *fp
)
1122 fprintf(fp
, "\n<hr />\n\n");
1124 fprintf(fp
, "%ls\n", conf
.body_end
);
1125 if (!conf
.suppress_address
) {
1126 fprintf(fp
,"<address>\n");
1127 if (conf
.address_start
)
1128 fprintf(fp
, "%ls\n", conf
.address_start
);
1129 /* Do the version ID */
1130 if (conf
.include_version_id
) {
1133 for (p
= sourceparas
; p
; p
= p
->next
)
1134 if (p
->type
== para_VersionID
) {
1135 xhtml_versionid(fp
, p
->words
, started
);
1139 if (conf
.address_end
)
1140 fprintf(fp
, "%ls\n", conf
.address_end
);
1141 fprintf(fp
, "</address>\n");
1143 fprintf(fp
, "</body>\n\n</html>\n");
1147 * Output the versionid paragraph. Typically this is a version control
1148 * ID string (such as $Id...$ in RCS).
1150 static void xhtml_versionid(FILE *fp
, word
*text
, int started
)
1152 rdstringc t
= { 0, 0, NULL
};
1154 rdaddc(&t
, '['); /* FIXME: configurability */
1155 xhtml_rdaddwc(&t
, text
, NULL
);
1156 rdaddc(&t
, ']'); /* FIXME: configurability */
1159 fprintf(fp
, "<br>\n");
1160 fprintf(fp
, "%s\n", t
.text
);
1164 /* Is this an XHTML reserved character? */
1165 static int xhtml_reservedchar(int c
)
1167 if (c
=='&' || c
=='<' || c
=='>' || c
=='"')
1174 * Convert a wide string into valid XHTML: Anything outside ASCII will
1175 * be fixed up as an entity. Currently we don't worry about constraining the
1176 * encoded character set, which we should probably do at some point (we can
1177 * still fix up and return FALSE - see the last comment here). We also don't
1180 * Because this is only used for words, spaces are HARD spaces (any other
1181 * spaces will be word_Whitespace not word_Normal). So they become
1182 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1185 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1186 * it in `*result'. If `result' is NULL, merely checks whether all
1187 * characters in the string are feasible.
1189 * Return is nonzero if all characters are OK. If not all
1190 * characters are OK but `result' is non-NULL, a result _will_
1191 * still be generated!
1193 static int xhtml_convert(wchar_t *s
, char **result
, int hard_spaces
) {
1194 int doing
= (result
!= 0);
1197 int plen
= 0, psize
= 0;
1202 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1204 if (((c
== 32 && !hard_spaces
) || (c
> 32 && c
<= 126 && !xhtml_reservedchar(c
)))) {
1209 p
[plen
++] = (char)c
;
1212 /* Char needs fixing up. */
1213 /* ok = FALSE; -- currently we never return FALSE; we
1214 * might want to when considering a character set for the
1219 if (c
==32) { /* a space in a word is a hard space */
1220 ensure_size(plen
+6); /* includes space for the NUL, which is subsequently stomped on */
1221 sprintf(p
+plen
, " ");
1224 /* FIXME: entity names! */
1225 ensure_size(plen
+8); /* includes space for the NUL, which is subsequently stomped on */
1226 plen
+=sprintf(p
+plen
, "&#%04i;", (int)c
);
1232 p
= resize(p
, plen
+1);
1240 * This formats the given words as XHTML.
1242 static void xhtml_rdaddwc(rdstringc
*rs
, word
*text
, word
*end
) {
1249 for (; text
&& text
!= end
; text
= text
->next
) {
1250 switch (text
->type
) {
1251 case word_HyperLink
:
1252 xhtml_utostr(text
->text
, &c
);
1253 rdaddsc(rs
, "<a href=\"");
1259 case word_UpperXref
:
1260 case word_LowerXref
:
1261 kwl
= kw_lookup(keywords
, text
->text
);
1263 sect
=xhtml_find_section(kwl
->para
);
1265 rdaddsc(rs
, "<a href=\"");
1266 rdaddsc(rs
, sect
->file
->filename
);
1268 rdaddsc(rs
, sect
->fragment
);
1271 rdaddsc(rs
, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1272 error(err_whatever
, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1275 rdaddsc(rs
, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1276 error(err_whatever
, "Couldn't locate cross-reference! (Wasn't in source file.)");
1280 case word_IndexRef
: /* in theory we could make an index target here */
1281 /* rdaddsc(rs, "<a name=\"idx-");
1282 xhtml_utostr(text->text, &c);
1285 rdaddsc(rs, "\"></a>");*/
1286 /* what we _do_ need to do is to fix up the backend data
1287 * for any indexentry this points to.
1289 for (ti
=0; (itag
= (indextag
*)index234(idx
->tags
, ti
))!=NULL
; ti
++) {
1290 /* FIXME: really ustricmp() and not ustrcmp()? */
1291 if (ustricmp(itag
->name
, text
->text
)==0) {
1296 if (itag
->refs
!=NULL
) {
1298 for (i
=0; i
<itag
->nrefs
; i
++) {
1299 xhtmlindex
*idx_ref
;
1302 ientry
= itag
->refs
[i
];
1303 if (ientry
->backend_data
==NULL
) {
1304 idx_ref
= (xhtmlindex
*) smalloc(sizeof(xhtmlindex
));
1306 fatal(err_nomemory
);
1307 idx_ref
->nsection
= 0;
1309 idx_ref
->sections
= (xhtmlsection
**) smalloc(idx_ref
->size
* sizeof(xhtmlsection
*));
1310 if (idx_ref
->sections
==NULL
)
1311 fatal(err_nomemory
);
1312 ientry
->backend_data
= idx_ref
;
1314 idx_ref
= ientry
->backend_data
;
1315 if (idx_ref
->nsection
+1 > idx_ref
->size
) {
1316 int new_size
= idx_ref
->size
* 2;
1317 idx_ref
->sections
= srealloc(idx_ref
->sections
, new_size
* sizeof(xhtmlsection
));
1318 if (idx_ref
->sections
==NULL
) {
1319 fatal(err_nomemory
);
1321 idx_ref
->size
= new_size
;
1324 idx_ref
->sections
[idx_ref
->nsection
++] = currentsection
;
1329 fatal(err_whatever
, "Index tag had no entries!");
1332 fprintf(stderr
, "Looking for index entry '%ls'\n", text
->text
);
1333 fatal(err_whatever
, "Couldn't locate index entry! (Wasn't in index.)");
1339 rdaddsc(rs
, "</a>");
1346 case word_WhiteSpace
:
1347 case word_EmphSpace
:
1348 case word_CodeSpace
:
1349 case word_WkCodeSpace
:
1351 case word_EmphQuote
:
1352 case word_CodeQuote
:
1353 case word_WkCodeQuote
:
1354 assert(text
->type
!= word_CodeQuote
&&
1355 text
->type
!= word_WkCodeQuote
);
1356 if (towordstyle(text
->type
) == word_Emph
&&
1357 (attraux(text
->aux
) == attr_First
||
1358 attraux(text
->aux
) == attr_Only
))
1359 rdaddsc(rs
, "<em>");
1360 else if ((towordstyle(text
->type
) == word_Code
|| towordstyle(text
->type
) == word_WeakCode
) &&
1361 (attraux(text
->aux
) == attr_First
||
1362 attraux(text
->aux
) == attr_Only
))
1363 rdaddsc(rs
, "<code>");
1365 if (removeattr(text
->type
) == word_Normal
) {
1366 if (xhtml_convert(text
->text
, &c
, TRUE
)) /* spaces in the word are hard */
1369 xhtml_rdaddwc(rs
, text
->alt
, NULL
);
1371 } else if (removeattr(text
->type
) == word_WhiteSpace
) {
1373 } else if (removeattr(text
->type
) == word_Quote
) {
1374 rdaddsc(rs
, """);
1377 if (towordstyle(text
->type
) == word_Emph
&&
1378 (attraux(text
->aux
) == attr_Last
||
1379 attraux(text
->aux
) == attr_Only
))
1380 rdaddsc(rs
, "</em>");
1381 else if ((towordstyle(text
->type
) == word_Code
|| towordstyle(text
->type
) == word_WeakCode
) &&
1382 (attraux(text
->aux
) == attr_Last
||
1383 attraux(text
->aux
) == attr_Only
))
1384 rdaddsc(rs
, "</code>");
1390 /* Output a heading, formatted as XHTML.
1392 static void xhtml_heading(FILE *fp
, paragraph
*p
)
1394 rdstringc t
= { 0, 0, NULL
};
1395 word
*tprefix
= p
->kwtext
;
1396 word
*nprefix
= p
->kwtext2
;
1397 word
*text
= p
->words
;
1398 int level
= xhtml_para_level(p
);
1399 xhtmlsection
*sect
= xhtml_find_section(p
);
1402 fragment
= sect
->fragment
;
1404 fragment
= ""; /* FIXME: what else can we do? */
1405 error(err_whatever
, "Couldn't locate heading cross-reference!");
1408 if (level
>2 && nprefix
) { /* FIXME: configurability on the level thing */
1409 xhtml_rdaddwc(&t
, nprefix
, NULL
);
1410 rdaddc(&t
, ' '); /* FIXME: as below */
1411 } else if (tprefix
) {
1412 xhtml_rdaddwc(&t
, tprefix
, NULL
);
1413 rdaddsc(&t
, ": "); /* FIXME: configurability */
1415 xhtml_rdaddwc(&t
, text
, NULL
);
1416 fprintf(fp
, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment
, level
, t
.text
, level
);
1420 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1421 * This looks pretty simple; I may have missed something ...
1423 static void xhtml_para(FILE *fp
, word
*text
)
1425 rdstringc out
= { 0, 0, NULL
};
1426 xhtml_rdaddwc(&out
, text
, NULL
);
1427 fprintf(fp
, "%s", out
.text
);
1431 /* Output a code paragraph. I'm treating this as preformatted, which
1432 * may not be entirely correct. See xhtml_para() for my worries about
1433 * this being overly-simple; however I think that most of the complexity
1434 * of the text backend came entirely out of word wrapping anyway.
1436 static void xhtml_codepara(FILE *fp
, word
*text
)
1438 fprintf(fp
, "<pre>");
1439 for (; text
; text
= text
->next
) if (text
->type
== word_WeakCode
) {
1441 xhtml_convert(text
->text
, &c
, FALSE
);
1442 fprintf(fp
, "%s\n", c
);
1445 fprintf(fp
, "</pre>\n");