Arrange a mechanism whereby each backend can be passed a filename
[sgt/halibut] / bk_xhtml.c
CommitLineData
d7482997 1/*
2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
4 *
5 * Still to do:
6 *
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
11 * perhaps others.
12 *
13 * Limitations:
14 *
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
27 */
28
29#include <stdio.h>
30#include <stdlib.h>
677e18a2 31#include <string.h>
d7482997 32#include <assert.h>
33#include "halibut.h"
34
50d6b4bd 35/*
36 * FILENAME_TEMPLATE (overridable in config of course) allows you
37 * to choose the general form for your HTML file names. It is
38 * slightly printf-styled (% followed by a single character is a
39 * formatting directive, %% is a literal %). Formatting directives
40 * are:
41 *
ba9c1487 42 * - %n is the section type-plus-number, minus whitespace (`Chapter1.2').
50d6b4bd 43 * - %b is the section number on its own (`1.2').
44 * - %k is the section's _internal_ keyword.
45 * - %N is the section's visible title in the output, again minus
46 * whitespace.
47 *
48 * %n, %b and %k will all default to %N if the section is
49 * unnumbered (`Bibliography' is often a good example).
50 */
51
52#define FILENAME_SINGLE "Manual.html"
53#define FILENAME_CONTENTS "Contents.html"
54#define FILENAME_INDEX "IndexPage.html"
55#define FILENAME_TEMPLATE "%n.html"
56
d7482997 57struct xhtmlsection_Struct {
58 struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
59 struct xhtmlsection_Struct *child; /* NULL if split across files */
60 struct xhtmlsection_Struct *parent; /* NULL if split across files */
61 struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
62 paragraph *para;
63 struct xhtmlfile_Struct *file; /* which file is this a part of? */
64 char *fragment; /* fragment id within the file */
65 int level;
66};
67
68struct xhtmlfile_Struct {
69 struct xhtmlfile_Struct *next;
70 struct xhtmlfile_Struct *child;
71 struct xhtmlfile_Struct *parent;
72 char *filename;
73 struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
74 int is_leaf; /* is this file a leaf file, ie does it not have any children? */
75};
76
77typedef struct xhtmlsection_Struct xhtmlsection;
78typedef struct xhtmlfile_Struct xhtmlfile;
79typedef struct xhtmlindex_Struct xhtmlindex;
80
81struct xhtmlindex_Struct {
82 int nsection;
83 int size;
84 xhtmlsection **sections;
85};
86
87typedef struct {
5d9cc07b 88 int just_numbers;
89 wchar_t *number_suffix;
90} xhtmlheadfmt;
91
92typedef struct {
d7482997 93 int contents_depth[6];
94 int leaf_contains_contents;
95 int leaf_level;
96 int leaf_smallest_contents;
97 int include_version_id;
98 wchar_t *author, *description;
99 wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
100 int suppress_address;
5d9cc07b 101 xhtmlheadfmt fchapter, *fsect;
102 int nfsect;
50d6b4bd 103 char *contents_filename, *index_filename;
104 char *single_filename, *template_filename;
d7482997 105} xhtmlconfig;
106
107/*static void xhtml_level(paragraph *, int);
108static void xhtml_level_0(paragraph *);
109static void xhtml_docontents(FILE *, paragraph *, int);
110static void xhtml_dosections(FILE *, paragraph *, int);
111static void xhtml_dobody(FILE *, paragraph *, int);*/
112
113static void xhtml_doheader(FILE *, word *);
114static void xhtml_dofooter(FILE *);
115static void xhtml_versionid(FILE *, word *, int);
116
117static void xhtml_utostr(wchar_t *, char **);
118static int xhtml_para_level(paragraph *);
119static int xhtml_reservedchar(int);
120
4b3c5afb 121static int xhtml_convert(wchar_t *, int, char **, int);
ce9921d6 122static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
123static void xhtml_para(FILE *, word *, int);
d7482997 124static void xhtml_codepara(FILE *, word *);
ce9921d6 125static void xhtml_heading(FILE *, paragraph *, int);
d7482997 126
127/* File-global variables are much easier than passing these things
128 * all over the place. Evil, but easier. We can replace this with a single
129 * structure at some point.
130 */
131static xhtmlconfig conf;
132static keywordlist *keywords;
133static indexdata *idx;
134static xhtmlfile *topfile;
135static xhtmlsection *topsection;
136static paragraph *sourceparas;
137static xhtmlfile *lastfile;
138static xhtmlfile *xhtml_last_file = NULL;
c8c7926b 139static int last_level=-1, start_level;
d7482997 140static xhtmlsection *currentsection;
141
142static xhtmlconfig xhtml_configure(paragraph *source)
143{
144 xhtmlconfig ret;
145
146 /*
147 * Defaults.
148 */
149 ret.contents_depth[0] = 2;
150 ret.contents_depth[1] = 3;
151 ret.contents_depth[2] = 4;
152 ret.contents_depth[3] = 5;
153 ret.contents_depth[4] = 6;
154 ret.contents_depth[5] = 7;
155 ret.leaf_level = 2;
156 ret.leaf_smallest_contents = 4;
157 ret.leaf_contains_contents = FALSE;
158 ret.include_version_id = TRUE;
159 ret.author = NULL;
160 ret.description = NULL;
161 ret.head_end = NULL;
162 ret.body = NULL;
163 ret.body_start = NULL;
164 ret.body_end = NULL;
165 ret.address_start = NULL;
166 ret.address_end = NULL;
167 ret.nav_attrs = NULL;
168 ret.suppress_address = FALSE;
169
5d9cc07b 170 ret.fchapter.just_numbers = FALSE;
e5e6bf9d 171 ret.fchapter.number_suffix = L": ";
5d9cc07b 172 ret.nfsect = 2;
173 ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
174 ret.fsect[0].just_numbers = FALSE;
e5e6bf9d 175 ret.fsect[0].number_suffix = L": ";
5d9cc07b 176 ret.fsect[1].just_numbers = TRUE;
e5e6bf9d 177 ret.fsect[1].number_suffix = L" ";
50d6b4bd 178 ret.contents_filename = strdup(FILENAME_CONTENTS);
179 ret.single_filename = strdup(FILENAME_SINGLE);
180 ret.index_filename = strdup(FILENAME_INDEX);
181 ret.template_filename = strdup(FILENAME_TEMPLATE);
5d9cc07b 182
d7482997 183 for (; source; source = source->next)
184 {
185 if (source->type == para_Config)
186 {
50d6b4bd 187 if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
188 sfree(ret.contents_filename);
189 ret.contents_filename = utoa_dup(uadv(source->keyword));
190 } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
191 sfree(ret.single_filename);
192 ret.single_filename = utoa_dup(uadv(source->keyword));
193 } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
194 sfree(ret.index_filename);
195 ret.index_filename = utoa_dup(uadv(source->keyword));
196 } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
197 sfree(ret.template_filename);
198 ret.template_filename = utoa_dup(uadv(source->keyword));
199 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
d7482997 200 ret.contents_depth[0] = utoi(uadv(source->keyword));
201 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
202 ret.contents_depth[1] = utoi(uadv(source->keyword));
203 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
204 ret.contents_depth[2] = utoi(uadv(source->keyword));
205 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
206 ret.contents_depth[3] = utoi(uadv(source->keyword));
207 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
208 ret.contents_depth[4] = utoi(uadv(source->keyword));
209 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
210 ret.contents_depth[5] = utoi(uadv(source->keyword));
211 } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
212 ret.leaf_level = utoi(uadv(source->keyword));
d7482997 213 } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
214 ret.leaf_smallest_contents = utoi(uadv(source->keyword));
215 } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
216 ret.include_version_id = utob(uadv(source->keyword));
217 } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
218 ret.leaf_contains_contents = utob(uadv(source->keyword));
219 } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
220 ret.suppress_address = utob(uadv(source->keyword));
221 } else if (!ustricmp(source->keyword, L"xhtml-author")) {
222 ret.author = uadv(source->keyword);
223 } else if (!ustricmp(source->keyword, L"xhtml-description")) {
224 ret.description = uadv(source->keyword);
225 } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
226 ret.head_end = uadv(source->keyword);
227 } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
228 ret.body_start = uadv(source->keyword);
229 } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
230 ret.body = uadv(source->keyword);
231 } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
232 ret.body_end = uadv(source->keyword);
233 } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
234 ret.address_start = uadv(source->keyword);
235 } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
236 ret.address_end = uadv(source->keyword);
237 } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
238 ret.nav_attrs = uadv(source->keyword);
5d9cc07b 239 } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
240 ret.fchapter.just_numbers = utob(uadv(source->keyword));
241 } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
e5e6bf9d 242 ret.fchapter.number_suffix = uadv(source->keyword);
5d9cc07b 243 } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
244 wchar_t *p = uadv(source->keyword);
245 int n = 0;
246 if (uisdigit(*p)) {
247 n = utoi(p);
248 p = uadv(p);
249 }
250 if (n >= ret.nfsect) {
251 int i;
252 ret.fsect = resize(ret.fsect, n+1);
253 for (i = ret.nfsect; i <= n; i++)
254 ret.fsect[i] = ret.fsect[ret.nfsect-1];
255 ret.nfsect = n+1;
256 }
257 ret.fsect[n].just_numbers = utob(p);
258 } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
259 wchar_t *p = uadv(source->keyword);
260 int n = 0;
261 if (uisdigit(*p)) {
262 n = utoi(p);
263 p = uadv(p);
264 }
265 if (n >= ret.nfsect) {
266 int i;
267 ret.fsect = resize(ret.fsect, n+1);
268 for (i = ret.nfsect; i <= n; i++)
269 ret.fsect[i] = ret.fsect[ret.nfsect-1];
270 ret.nfsect = n+1;
271 }
e5e6bf9d 272 ret.fsect[n].number_suffix = p;
d7482997 273 }
274 }
275 }
276
277 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
278 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
279 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
280 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
281 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
282 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
283 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
284 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
285 return ret;
286}
287
ba9c1487 288paragraph *xhtml_config_filename(char *filename)
289{
290 /*
291 * If the user passes in a single filename as a parameter to
292 * the `--html' command-line option, then we should assume it
293 * to imply _two_ config directives:
294 * \cfg{xhtml-single-filename}{whatever} and
295 * \cfg{xhtml-leaf-level}{0}; the rationale being that the user
296 * wants their output _in that file_.
297 */
298
299 paragraph *p[2];
300 int i, len;
301 wchar_t *ufilename, *up;
302
303 for (i = 0; i < 2; i++) {
304 p[i] = mknew(paragraph);
305 memset(p[i], 0, sizeof(*p[i]));
306 p[i]->type = para_Config;
307 p[i]->next = NULL;
308 p[i]->fpos.filename = "<command line>";
309 p[i]->fpos.line = p[i]->fpos.col = -1;
310 }
311
312 ufilename = ufroma_dup(filename);
313 len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename");
314 p[0]->keyword = mknewa(wchar_t, len);
315 up = p[0]->keyword;
316 ustrcpy(up, L"xhtml-single-filename");
317 up = uadv(up);
318 ustrcpy(up, ufilename);
319 up = uadv(up);
320 *up = L'\0';
321 assert(up - p[0]->keyword < len);
322 sfree(ufilename);
323
324 len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1;
325 p[1]->keyword = mknewa(wchar_t, len);
326 up = p[1]->keyword;
327 ustrcpy(up, L"xhtml-leaf-level");
328 up = uadv(up);
329 ustrcpy(up, L"0");
330 up = uadv(up);
331 *up = L'\0';
332 assert(up - p[1]->keyword < len);
333
334 p[0]->next = p[1];
335
336 return p[0];
337}
338
d7482997 339static xhtmlsection *xhtml_new_section(xhtmlsection *last)
340{
341 xhtmlsection *ret = mknew(xhtmlsection);
342 ret->next=NULL;
343 ret->child=NULL;
344 ret->parent=NULL;
345 ret->chain=last;
346 ret->para=NULL;
347 ret->file=NULL;
348 ret->fragment=NULL;
349 ret->level=-1; /* marker: end of chain */
350 return ret;
351}
352
353/* Returns NULL or the section that marks that paragraph */
354static xhtmlsection *xhtml_find_section(paragraph *p)
355{
356 xhtmlsection *ret = topsection;
357 if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
358 paragraph *p2 = sourceparas;
359 paragraph *p3 = NULL;
360 while (p2 && p2!=p) {
361 if (xhtml_para_level(p2)!=-1) {
362 p3 = p2;
363 }
364 p2=p2->next;
365 }
366 if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
367 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
368 * So don't do that, then.
369 */
370 return NULL;
371 }
372 p=p3;
373 }
374 while (ret && ret->para != p) {
375/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
376 ret=ret->chain;
377 }
378 return ret;
379}
380
381static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
382{
383 xhtmlfile *ret = mknew(xhtmlfile);
384
385 ret->next=NULL;
386 ret->child=NULL;
387 ret->parent=NULL;
388 ret->filename=NULL;
389 ret->sections=sect;
390 ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
391 if (sect==NULL) {
392 if (conf.leaf_level==0) { /* currently unused */
50d6b4bd 393 ret->filename = smalloc(strlen(conf.single_filename)+1);
394 sprintf(ret->filename, conf.single_filename);
d7482997 395 } else {
50d6b4bd 396 ret->filename = smalloc(strlen(conf.contents_filename)+1);
397 sprintf(ret->filename, conf.contents_filename);
d7482997 398 }
399 } else {
400 paragraph *p = sect->para;
401 rdstringc fname_c = { 0, 0, NULL };
50d6b4bd 402 char *c, *t;
d7482997 403 word *w;
50d6b4bd 404 wchar_t *ws;
405
406 t = conf.template_filename;
407 while (*t) {
408 if (*t == '%' && t[1]) {
409 int fmt;
410
411 t++;
412 fmt = *t++;
413
414 if (fmt == '%') {
415 rdaddc(&fname_c, fmt);
416 continue;
417 }
418
419 w = NULL;
420 ws = NULL;
421
422 if (p->kwtext && fmt == 'n')
423 w = p->kwtext;
424 else if (p->kwtext2 && fmt == 'b')
425 w = p->kwtext2;
426 else if (p->keyword && *p->keyword && fmt == 'k')
427 ws = p->keyword;
428 else
429 w = p->words;
430
431 while (w) {
432 switch (removeattr(w->type))
433 {
434 case word_Normal:
435 /*case word_Emph:
436 case word_Code:
437 case word_WeakCode:*/
438 xhtml_utostr(w->text, &c);
439 rdaddsc(&fname_c,c);
440 sfree(c);
441 break;
442 }
443 w = w->next;
444 }
445 if (ws) {
446 xhtml_utostr(ws, &c);
447 rdaddsc(&fname_c,c);
448 sfree(c);
449 }
450 } else {
451 rdaddc(&fname_c, *t++);
d7482997 452 }
453 }
50d6b4bd 454
d7482997 455 ret->filename = rdtrimc(&fname_c);
456 }
457 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
458 return ret;
459}
460
461/*
462 * Walk the tree fixing up files which are actually leaf (ie
463 * have no children) but aren't at leaf level, so they have the
464 * leaf flag set.
465 */
466void xhtml_fixup_layout(xhtmlfile* file)
467{
468 if (file->child==NULL) {
469 file->is_leaf = TRUE;
470 } else {
471 xhtml_fixup_layout(file->child);
472 }
473 if (file->next)
474 xhtml_fixup_layout(file->next);
475}
476
477/*
478 * Create the tree structure so we know where everything goes.
479 * Method:
480 *
481 * Ignoring file splitting, we have three choices with each new section:
482 *
483 * +-----------------+-----------------+
484 * | | |
485 * X +----X----+ (1)
486 * | |
5d9cc07b 487 * Y (2)
d7482997 488 * |
489 * (3)
490 *
491 * Y is the last section we added (currentsect).
492 * If sect is the section we want to add, then:
493 *
494 * (1) if sect->level < currentsect->level
495 * (2) if sect->level == currentsect->level
496 * (3) if sect->level > currentsect->level
497 *
498 * This requires the constraint that you never skip section numbers
499 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
500 *
501 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
502 * more than one level at a time. Lots of asserts, and probably part of
503 * the algorithm here, rely on this being true. (It currently isn't
504 * enforced by halibut, however.)
505 *
506 * File splitting makes this harder. For instance, say we added at (3)
507 * above and now need to add another section. We are splitting at level
508 * 2, ie the level of Y. Z is the last section we added:
509 *
510 * +-----------------+-----------------+
511 * | | |
512 * X +----X----+ (1)
513 * | |
514 * +----Y----+ (1)
515 * | |
516 * Z (2)
517 * |
518 * (3)
519 *
520 * The (1) case is now split; we need to search upwards to find where
521 * to actually link in. The other two cases remain the same (and will
522 * always be like this).
523 *
524 * File splitting makes this harder, however. The decision of whether
525 * to split to a new file is always on the same condition, however (is
526 * the level of this section higher than the leaf_level configuration
527 * value or not).
528 *
529 * Treating the cases backwards:
530 *
531 * (3) same file if sect->level > conf.leaf_level, otherwise new file
532 *
533 * if in the same file, currentsect->child points to sect
534 * otherwise the linking is done through the file tree (which works
535 * in more or less the same way, ie currentfile->child points to
536 * the new file)
537 *
538 * (2) same file if sect->level > conf.leaf_level, otherwise new file
539 *
540 * if in the same file, currentsect->next points to sect
541 * otherwise file linking and currentfile->next points to the new
542 * file (we know that Z must have caused a new file to be created)
543 *
544 * (1) same file if sect->level > conf.leaf_level, otherwise new file
545 *
546 * this is actually effectively the same case as (2) here,
547 * except that we first have to travel up the sections to figure
548 * out which section this new one will be a sibling of. In doing
549 * so, we may disappear off the top of a file and have to go up
550 * to its parent in the file tree.
551 *
552 */
553static void xhtml_ponder_layout(paragraph *p)
554{
555 xhtmlsection *lastsection;
556 xhtmlsection *currentsect;
557 xhtmlfile *currentfile;
558
559 lastfile = NULL;
560 topsection = xhtml_new_section(NULL);
561 topfile = xhtml_new_file(NULL);
562 lastsection = topsection;
563 currentfile = topfile;
564 currentsect = topsection;
565
d2e74722 566 if (conf.leaf_level == 0) {
567 topfile->is_leaf = 1;
568 topfile->sections = topsection;
569 topsection->file = topfile;
570 }
571
d7482997 572 for (; p; p=p->next)
573 {
574 int level = xhtml_para_level(p);
575 if (level>0) /* actually a section */
576 {
577 xhtmlsection *sect;
578 word *w;
579 char *c;
580 rdstringc fname_c = { 0, 0, NULL };
581
582 sect = xhtml_new_section(lastsection);
583 lastsection = sect;
584 sect->para = p;
585 for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
586 {
587 switch (removeattr(w->type))
588 {
589 case word_Normal:
590 /*case word_Emph:
591 case word_Code:
592 case word_WeakCode:*/
593 xhtml_utostr(w->text, &c);
594 rdaddsc(&fname_c,c);
595 sfree(c);
596 break;
597 }
598 }
599/* rdaddsc(&fname_c, ".html");*/
600 sect->fragment = rdtrimc(&fname_c);
601 sect->level = level;
602 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
603
604 if (level>currentsect->level) { /* case (3) */
605 if (level>conf.leaf_level) { /* same file */
606 assert(currentfile->is_leaf);
607 currentsect->child = sect;
608 sect->parent=currentsect;
609 sect->file=currentfile;
610 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
611 currentsect=sect;
612 } else { /* new file */
613 xhtmlfile *file = xhtml_new_file(sect);
614 assert(!currentfile->is_leaf);
615 currentfile->child=file;
616 sect->file=file;
617 file->parent=currentfile;
618 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
619 currentfile=file;
620 currentsect=sect;
621 }
622 } else if (level >= currentsect->file->sections->level) {
623 /* Case (1) or (2) *AND* still under the section that starts
624 * the current file.
625 *
626 * I'm not convinced that this couldn't be rolled in with the
627 * final else {} leg further down. It seems a lot of effort
628 * this way.
629 */
630 if (level>conf.leaf_level) { /* stick within the same file */
631 assert(currentfile->is_leaf);
632 sect->file = currentfile;
633 while (currentsect && currentsect->level > level &&
634 currentsect->file==currentsect->parent->file) {
635 currentsect = currentsect->parent;
636 }
637 assert(currentsect);
638 currentsect->next = sect;
639 assert(currentsect->level == sect->level);
640 sect->parent = currentsect->parent;
641 currentsect = sect;
642 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
643 } else { /* new file */
644 xhtmlfile *file = xhtml_new_file(sect);
645 sect->file=file;
646 currentfile->next=file;
647 file->parent=currentfile->parent;
648 file->is_leaf=(level==conf.leaf_level);
649 file->sections=sect;
650 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
651 currentfile=file;
652 currentsect=sect;
653 }
654 } else { /* Case (1) or (2) and we must move up the file tree first */
655 /* this loop is now probably irrelevant - we know we can't connect
656 * to anything in the current file */
657 while (currentsect && level<currentsect->level) {
658 currentsect=currentsect->parent;
659 if (currentsect) {
660 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
661 } else {
662 /* printf(" * up one level (off top of current file)\n");*/
663 }
664 }
665 if (currentsect) {
666 /* I'm pretty sure this can now never fire */
667 assert(currentfile->is_leaf);
668 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
669 sect->file = currentfile;
670 currentsect->next=sect;
671 currentsect=sect;
672 } else { /* find a file we can attach to */
673 while (currentfile && currentfile->sections && level<currentfile->sections->level) {
674 currentfile=currentfile->parent;
675 if (currentfile) {
676 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
677 } else {
678 /* printf(" * up one file level (off top of tree)\n");*/
679 }
680 }
681 if (currentfile) { /* new file (we had to skip up a file to
682 get here, so we must be dealing with a
683 level no lower than the configured
684 leaf_level */
685 xhtmlfile *file = xhtml_new_file(sect);
686 currentfile->next=file;
687 sect->file=file;
688 file->parent=currentfile->parent;
689 file->is_leaf=(level==conf.leaf_level);
690 file->sections=sect;
691 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
692 currentfile=file;
693 currentsect=sect;
694 } else {
695 fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
696 }
697 }
698 }
699 }
700 }
701 topsection = lastsection; /* get correct end of the chain */
702 xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
703}
704
705static void xhtml_do_index();
706static void xhtml_do_file(xhtmlfile *file);
707static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
ce9921d6 708static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
d7482997 709static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
710static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
711static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
712static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
713static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
714static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
715
716/*
717 * Do all the files in this structure.
718 */
719static void xhtml_do_files(xhtmlfile *file)
720{
721 xhtml_do_file(file);
722 if (file->child)
723 xhtml_do_files(file->child);
724 if (file->next)
725 xhtml_do_files(file->next);
726}
727
728/*
729 * Free up all memory used by the file tree from 'xfile' downwards
730 */
731static void xhtml_free_file(xhtmlfile* xfile)
732{
733 if (xfile==NULL) {
734 return;
735 }
736
737 if (xfile->filename) {
738 sfree(xfile->filename);
739 }
740 xhtml_free_file(xfile->child);
741 xhtml_free_file(xfile->next);
742 sfree(xfile);
743}
744
745/*
746 * Main function.
747 */
748void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
749 indexdata *in_idx)
750{
751/* int i;*/
752 indexentry *ientry;
753 int ti;
754 xhtmlsection *xsect;
755
756 sourceparas = sourceform;
757 conf = xhtml_configure(sourceform);
758 keywords = in_keywords;
759 idx = in_idx;
760
761 /* Clear up the index entries backend data pointers */
762 for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
763 ientry->backend_data=NULL;
764 }
765
766 xhtml_ponder_layout(sourceform);
767
768 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
769/* xhtml_level_0(sourceform);
770 for (i=1; i<=conf.leaf_level; i++)
771 {
772 xhtml_level(sourceform, i);
773 }*/
774
775 /* new system ... (writes to *.html, but isn't fully trusted) */
776 xhtml_do_top_file(topfile, sourceform);
777 assert(!topfile->next); /* shouldn't have a sibling at all */
d2e74722 778 if (topfile->child) {
779 xhtml_do_files(topfile->child);
780 xhtml_do_index();
781 }
d7482997 782
783 /* release file, section, index data structures */
784 xsect = topsection;
785 while (xsect) {
786 xhtmlsection *tmp = xsect->chain;
787 if (xsect->fragment) {
788 sfree(xsect->fragment);
789 }
790 sfree(xsect);
791 xsect = tmp;
792 }
793 xhtml_free_file(topfile);
794 for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
795 if (ientry->backend_data!=NULL) {
796 xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
797 if (xi->sections!=NULL) {
798 sfree(xi->sections);
799 }
800 sfree(xi);
801 }
802 ientry->backend_data = NULL;
803 }
e5e6bf9d 804 sfree(conf.fsect);
d7482997 805}
806
807static int xhtml_para_level(paragraph *p)
808{
809 switch (p->type)
810 {
d9d3dd95 811 case para_Title:
812 return 0;
813 break;
d7482997 814 case para_UnnumberedChapter:
815 case para_Chapter:
816 case para_Appendix:
817 return 1;
818 break;
819/* case para_BiblioCited:
820 return 2;
821 break;*/
822 case para_Heading:
823 case para_Subsect:
824 return p->aux+2;
825 break;
826 default:
827 return -1;
828 break;
829 }
830}
831
d7482997 832/* Output the nav links for the current file.
833 * file == NULL means we're doing the index
834 */
835static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
836{
837 xhtmlfile *xhtml_next_file = NULL;
838 fprintf(fp, "<p");
839 if (conf.nav_attrs!=NULL) {
840 fprintf(fp, " %ls>", conf.nav_attrs);
841 } else {
842 fprintf(fp, ">");
843 }
844 if (xhtml_last_file==NULL) {
845 fprintf(fp, "Previous | ");
846 } else {
847 fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
848 }
50d6b4bd 849 fprintf(fp, "<a href='%s'>Contents</a> | ", conf.contents_filename);
25acf71d 850 if (file == NULL) {
851 fprintf(fp, "Index | ");
852 } else {
50d6b4bd 853 fprintf(fp, "<a href='%s'>Index</a> | ", conf.index_filename);
25acf71d 854 }
d7482997 855 if (file != NULL) { /* otherwise we're doing nav links for the index */
856 if (xhtml_next_file==NULL)
857 xhtml_next_file = file->child;
858 if (xhtml_next_file==NULL)
859 xhtml_next_file = file->next;
860 if (xhtml_next_file==NULL)
861 xhtml_next_file = file->parent->next;
862 }
863 if (xhtml_next_file==NULL) {
864 if (file==NULL) { /* index, so no next file */
865 fprintf(fp, "Next ");
866 } else {
50d6b4bd 867 fprintf(fp, "<a href='%s'>Next</a>", conf.index_filename);
d7482997 868 }
869 } else {
870 fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
871 }
872 fprintf(fp, "</p>\n");
873}
874
875/* Write out the index file */
d2e74722 876static void xhtml_do_index_body(FILE *fp)
d7482997 877{
d7482997 878 indexentry *y;
879 int ti;
d7482997 880
d2e74722 881 if (count234(idx->entries) == 0)
882 return; /* don't write anything at all */
d7482997 883
884 fprintf(fp, "<dl>\n");
885 /* iterate over idx->entries using the tree functions and display everything */
886 for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
887 if (y->backend_data) {
888 int i;
889 xhtmlindex *xi;
890
891 fprintf(fp, "<dt>");
ce9921d6 892 xhtml_para(fp, y->text, FALSE);
d7482997 893 fprintf(fp, "</dt>\n<dd>");
894
895 xi = (xhtmlindex*) y->backend_data;
896 for (i=0; i<xi->nsection; i++) {
897 xhtmlsection *sect = xi->sections[i];
898 if (sect) {
899 fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
900 if (sect->para->kwtext) {
ce9921d6 901 xhtml_para(fp, sect->para->kwtext, FALSE);
d7482997 902 } else if (sect->para->words) {
ce9921d6 903 xhtml_para(fp, sect->para->words, FALSE);
d7482997 904 }
905 fprintf(fp, "</a>");
906 if (i+1<xi->nsection) {
907 fprintf(fp, ", ");
908 }
909 }
910 }
911 fprintf(fp, "</dd>\n");
912 }
913 }
914 fprintf(fp, "</dl>\n");
d2e74722 915}
916static void xhtml_do_index()
917{
918 word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
50d6b4bd 919 FILE *fp = fopen(conf.index_filename, "w");
d2e74722 920
921 if (fp==NULL)
50d6b4bd 922 fatal(err_cantopenw, conf.index_filename);
d2e74722 923 xhtml_doheader(fp, &temp_word);
924 xhtml_donavlinks(fp, NULL);
925
926 xhtml_do_index_body(fp);
d7482997 927
928 xhtml_donavlinks(fp, NULL);
929 xhtml_dofooter(fp);
930 fclose(fp);
931}
932
933/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
934static void xhtml_do_file(xhtmlfile *file)
935{
936 FILE *fp = fopen(file->filename, "w");
937 if (fp==NULL)
938 fatal(err_cantopenw, file->filename);
939
940 if (file->sections->para->words) {
941 xhtml_doheader(fp, file->sections->para->words);
942 } else if (file->sections->para->kwtext) {
943 xhtml_doheader(fp, file->sections->para->kwtext);
944 } else {
945 xhtml_doheader(fp, NULL);
946 }
947
948 xhtml_donavlinks(fp, file);
949
d2e74722 950 if (file->is_leaf && conf.leaf_contains_contents &&
951 xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
d7482997 952 xhtml_do_contents(fp, file);
953 xhtml_do_sections(fp, file->sections);
954 if (!file->is_leaf)
955 xhtml_do_naked_contents(fp, file);
956
957 xhtml_donavlinks(fp, file);
958
959 xhtml_dofooter(fp);
960 fclose(fp);
961
962 xhtml_last_file = file;
963}
964
965/* Output the top-level file. */
966static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
967{
968 paragraph *p;
969 int done=FALSE;
970 FILE *fp = fopen(file->filename, "w");
971 if (fp==NULL)
972 fatal(err_cantopenw, file->filename);
973
974 /* Do the title -- only one allowed */
975 for (p = sourceform; p && !done; p = p->next)
976 {
977 if (p->type == para_Title)
978 {
979 xhtml_doheader(fp, p->words);
980 done=TRUE;
981 }
982 }
983 if (!done)
984 xhtml_doheader(fp, NULL /* Eek! */);
985
d2e74722 986 /*
987 * Display the title.
988 */
989 for (p = sourceform; p; p = p->next)
990 {
991 if (p->type == para_Title) {
ce9921d6 992 xhtml_heading(fp, p, FALSE);
d2e74722 993 break;
994 }
995 }
996
9057a0a8 997 /* Do the preamble */
d7482997 998 for (p = sourceform; p; p = p->next)
999 {
8902e0ed 1000 if (p->type == para_Chapter || p->type == para_Heading ||
1001 p->type == para_Subsect || p->type == para_Appendix ||
1002 p->type == para_UnnumberedChapter) {
1003 /*
1004 * We've found the end of the preamble. Do every normal
1005 * paragraph up to there.
1006 */
ce9921d6 1007 xhtml_do_paras(fp, sourceform, p, FALSE);
8902e0ed 1008 break;
d7482997 1009 }
1010 }
d7482997 1011
1012 xhtml_do_contents(fp, file);
1013 xhtml_do_sections(fp, file->sections);
d2e74722 1014
5d9cc07b 1015 /*
1016 * Put the index in the top file if we're in single-file mode
1017 * (leaf-level 0).
1018 */
1019 if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
d2e74722 1020 fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
1021 xhtml_do_index_body(fp);
1022 }
1023
d7482997 1024 xhtml_dofooter(fp);
1025 fclose(fp);
1026}
1027
1028/* Convert a Unicode string to an ASCII one. '?' is
1029 * used for unmappable characters.
1030 */
1031static void xhtml_utostr(wchar_t *in, char **out)
1032{
1033 int l = ustrlen(in);
1034 int i;
1035 *out = smalloc(l+1);
1036 for (i=0; i<l; i++)
1037 {
1038 if (in[i]>=32 && in[i]<=126)
1039 (*out)[i]=(char)in[i];
1040 else
1041 (*out)[i]='?';
1042 }
1043 (*out)[i]=0;
1044}
1045
1046/*
1047 * Write contents for the given file, and subfiles, down to
1048 * the appropriate contents depth. Returns the number of
1049 * entries written.
1050 */
1051static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
1052{
c8c7926b 1053 int level, limit, count = 0;
d7482997 1054 if (!file)
1055 return 0;
1056
1057 level = (file->sections)?(file->sections->level):(0);
1058 limit = conf.contents_depth[(level>5)?(5):(level)];
1059 start_level = (file->is_leaf) ? (level-1) : (level);
1060 last_level = start_level;
1061
1062 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1063 count += xhtml_do_contents_limit(fp, file->child, limit);
1064 if (fp!=NULL) {
1065 while (last_level > start_level) {
1066 last_level--;
c8c7926b 1067 fprintf(fp, "</li></ul>\n");
d7482997 1068 }
1069 }
1070 return count;
1071}
1072
1073/* As above, but doesn't do anything in the current file */
1074static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
1075{
1076 int level, limit, start_level, count = 0;
1077 if (!file)
1078 return 0;
1079
1080 level = (file->sections)?(file->sections->level):(0);
1081 limit = conf.contents_depth[(level>5)?(5):(level)];
1082 start_level = (file->is_leaf) ? (level-1) : (level);
1083 last_level = start_level;
1084
1085 count = xhtml_do_contents_limit(fp, file->child, limit);
1086 if (fp!=NULL) {
1087 while (last_level > start_level) {
1088 last_level--;
c8c7926b 1089 fprintf(fp, "</li></ul>\n");
d7482997 1090 }
1091 }
1092 return count;
1093}
1094
1095/*
1096 * Write contents for the given file, children, and siblings, down to
1097 * given limit contents depth.
1098 */
1099static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
1100{
1101 int count = 0;
1102 while (file) {
1103 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1104 count += xhtml_do_contents_limit(fp, file->child, limit);
1105 file = file->next;
1106 }
1107 return count;
1108}
1109
1110/*
1111 * Write contents entries for the given section tree, down to the
1112 * limit contents depth.
1113 */
1114static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
1115{
1116 int count = 0;
1117 while (section) {
1118 if (!xhtml_add_contents_entry(fp, section, limit))
1119 return 0;
1120 else
1121 count++;
1122 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1123 section = section->next;
1124 }
1125 return count;
1126}
1127
1128/*
1129 * Write contents entries for the given section tree, down to the
1130 * limit contents depth.
1131 */
1132static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1133{
1134 int count = 0;
1135 if (!section)
1136 return 0;
1137 xhtml_add_contents_entry(fp, section, limit);
1138 count=1;
1139 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1140 /* section=section->child;
1141 while (section && xhtml_add_contents_entry(fp, section, limit)) {
1142 section = section->next;
1143 }*/
1144 return count;
1145}
1146
1147/*
1148 * Add a section entry, unless we're exceeding the limit, in which
1149 * case return FALSE (otherwise return TRUE).
1150 */
1151static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1152{
1153 if (!section || section->level > limit)
1154 return FALSE;
5d9cc07b 1155 if (fp==NULL || section->level < 0)
d7482997 1156 return TRUE;
c8c7926b 1157 if (last_level > section->level) {
1158 while (last_level > section->level) {
1159 last_level--;
1160 fprintf(fp, "</li></ul>\n");
1161 }
1162 fprintf(fp, "</li>\n");
1163 } else if (last_level < section->level) {
1164 assert(last_level == section->level - 1);
d7482997 1165 last_level++;
1166 fprintf(fp, "<ul>\n");
c8c7926b 1167 } else {
1168 fprintf(fp, "</li>\n");
d7482997 1169 }
1170 fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1171 if (section->para->kwtext) {
ce9921d6 1172 xhtml_para(fp, section->para->kwtext, FALSE);
d7482997 1173 if (section->para->words) {
1174 fprintf(fp, ": ");
1175 }
1176 }
1177 if (section->para->words) {
ce9921d6 1178 xhtml_para(fp, section->para->words, FALSE);
d7482997 1179 }
c8c7926b 1180 fprintf(fp, "</a>\n");
d7482997 1181 return TRUE;
1182}
1183
1184/*
1185 * Write all the sections in this file. Do all paragraphs in this section, then all
1186 * children (recursively), then go on to the next one (tail recursively).
1187 */
1188static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1189{
1190 while (sections) {
1191 currentsection = sections;
ce9921d6 1192 xhtml_do_paras(fp, sections->para, NULL, TRUE);
d7482997 1193 xhtml_do_sections(fp, sections->child);
1194 sections = sections->next;
1195 }
1196}
1197
1198/* Write this list of paragraphs. Close off all lists at the end. */
ce9921d6 1199static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1200 int indexable)
d7482997 1201{
7136a6c7 1202 int last_type = -1, ptype, first=TRUE;
1203 stack lcont_stack = stk_new();
d7482997 1204 if (!p)
1205 return;
1206
1207/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
8902e0ed 1208 for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
d7482997 1209 first=FALSE;
7136a6c7 1210 switch (ptype = p->type)
d7482997 1211 {
1212 /*
1213 * Things we ignore because we've already processed them or
1214 * aren't going to touch them in this pass.
1215 */
1216 case para_IM:
1217 case para_BR:
1218 case para_Biblio: /* only touch BiblioCited */
1219 case para_VersionID:
d7482997 1220 case para_NoCite:
1221 case para_Title:
1222 break;
1223
1224 /*
1225 * Chapter titles.
1226 */
1227 case para_Chapter:
1228 case para_Appendix:
1229 case para_UnnumberedChapter:
ce9921d6 1230 xhtml_heading(fp, p, indexable);
d7482997 1231 break;
1232
1233 case para_Heading:
1234 case para_Subsect:
ce9921d6 1235 xhtml_heading(fp, p, indexable);
d7482997 1236 break;
1237
1238 case para_Rule:
1239 fprintf(fp, "\n<hr />\n");
1240 break;
1241
1242 case para_Normal:
9057a0a8 1243 case para_Copyright:
d7482997 1244 fprintf(fp, "\n<p>");
ce9921d6 1245 xhtml_para(fp, p->words, indexable);
d7482997 1246 fprintf(fp, "</p>\n");
1247 break;
1248
7136a6c7 1249 case para_LcontPush:
1250 {
1251 int *p;
1252 p = mknew(int);
1253 *p = last_type;
1254 stk_push(lcont_stack, p);
1255 last_type = para_Normal;
1256 }
1257 break;
1258 case para_LcontPop:
1259 {
1260 int *p = stk_pop(lcont_stack);
1261 assert(p);
1262 ptype = last_type = *p;
1263 sfree(p);
1264 goto closeofflist; /* ick */
1265 }
1266 break;
2614b01d 1267 case para_QuotePush:
1268 fprintf(fp, "<blockquote>\n");
1269 break;
1270 case para_QuotePop:
1271 fprintf(fp, "</blockquote>\n");
1272 break;
7136a6c7 1273
d7482997 1274 case para_Bullet:
1275 case para_NumberedList:
7136a6c7 1276 case para_Description:
1277 case para_DescribedThing:
d7482997 1278 case para_BiblioCited:
c8c7926b 1279 if (last_type!=p->type &&
1280 !(last_type==para_DescribedThing && p->type==para_Description) &&
1281 !(last_type==para_Description && p->type==para_DescribedThing)) {
d7482997 1282 /* start up list if necessary */
1283 if (p->type == para_Bullet) {
1284 fprintf(fp, "<ul>\n");
1285 } else if (p->type == para_NumberedList) {
1286 fprintf(fp, "<ol>\n");
7136a6c7 1287 } else if (p->type == para_BiblioCited ||
1288 p->type == para_DescribedThing ||
1289 p->type == para_Description) {
d7482997 1290 fprintf(fp, "<dl>\n");
1291 }
1292 }
7136a6c7 1293 if (p->type == para_Bullet || p->type == para_NumberedList) {
d7482997 1294 fprintf(fp, "<li>");
7136a6c7 1295 } else if (p->type == para_DescribedThing) {
1296 fprintf(fp, "<dt>");
1297 } else if (p->type == para_Description) {
1298 fprintf(fp, "<dd>");
1299 } else if (p->type == para_BiblioCited) {
d7482997 1300 fprintf(fp, "<dt>");
ce9921d6 1301 xhtml_para(fp, p->kwtext, indexable);
d7482997 1302 fprintf(fp, "</dt>\n<dd>");
1303 }
ce9921d6 1304 xhtml_para(fp, p->words, indexable);
7136a6c7 1305 {
1306 paragraph *p2 = p->next;
1307 if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1308 break;
1309 }
1310
1311 closeofflist:
1312 if (ptype == para_BiblioCited) {
d7482997 1313 fprintf(fp, "</dd>\n");
c8c7926b 1314 } else if (ptype == para_DescribedThing) {
7136a6c7 1315 fprintf(fp, "</dt>");
c8c7926b 1316 } else if (ptype == para_Description) {
7136a6c7 1317 fprintf(fp, "</dd>");
1318 } else if (ptype == para_Bullet || ptype == para_NumberedList) {
d7482997 1319 fprintf(fp, "</li>");
1320 }
7136a6c7 1321 if (ptype == para_Bullet || ptype == para_NumberedList ||
1322 ptype == para_BiblioCited || ptype == para_Description ||
1323 ptype == para_DescribedThing)
d7482997 1324 /* close off list if necessary */
1325 {
1326 paragraph *p2 = p->next;
1327 int close_off=FALSE;
1328/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1329 if (p2 && xhtml_para_level(p2)==-1) {
c8c7926b 1330 if (p2->type != ptype &&
1331 !(p2->type==para_DescribedThing && ptype==para_Description) &&
1332 !(p2->type==para_Description && ptype==para_DescribedThing) &&
1333 p2->type != para_LcontPush)
d7482997 1334 close_off=TRUE;
1335 } else {
1336 close_off=TRUE;
1337 }
1338 if (close_off) {
7136a6c7 1339 if (ptype == para_Bullet) {
d7482997 1340 fprintf(fp, "</ul>\n");
7136a6c7 1341 } else if (ptype == para_NumberedList) {
d7482997 1342 fprintf(fp, "</ol>\n");
7136a6c7 1343 } else if (ptype == para_BiblioCited ||
1344 ptype == para_Description ||
1345 ptype == para_DescribedThing) {
d7482997 1346 fprintf(fp, "</dl>\n");
1347 }
1348 }
1349 }
1350 break;
1351
1352 case para_Code:
1353 xhtml_codepara(fp, p->words);
1354 break;
1355 }
7136a6c7 1356 last_type = ptype;
d7482997 1357 }
7136a6c7 1358
1359 stk_free(lcont_stack);
d7482997 1360}
1361
1362/*
1363 * Output a header for this XHTML file.
1364 */
1365static void xhtml_doheader(FILE *fp, word *title)
1366{
1367 fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1368 fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1369 fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1370 if (title==NULL)
1371 fprintf(fp, "The thing with no name!");
1372 else
ce9921d6 1373 xhtml_para(fp, title, FALSE);
d7482997 1374 fprintf(fp, "</title>\n");
1375 fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1376 if (conf.author)
1377 fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1378 if (conf.description)
1379 fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1380 if (conf.head_end)
1381 fprintf(fp, "%ls\n", conf.head_end);
1382 fprintf(fp, "</head>\n\n");
1383 if (conf.body)
1384 fprintf(fp, "%ls\n", conf.body);
1385 else
1386 fprintf(fp, "<body>\n");
1387 if (conf.body_start)
1388 fprintf(fp, "%ls\n", conf.body_start);
1389}
1390
1391/*
1392 * Output a footer for this XHTML file.
1393 */
1394static void xhtml_dofooter(FILE *fp)
1395{
1396 fprintf(fp, "\n<hr />\n\n");
1397 if (conf.body_end)
1398 fprintf(fp, "%ls\n", conf.body_end);
1399 if (!conf.suppress_address) {
1400 fprintf(fp,"<address>\n");
1401 if (conf.address_start)
1402 fprintf(fp, "%ls\n", conf.address_start);
1403 /* Do the version ID */
1404 if (conf.include_version_id) {
1405 paragraph *p;
1406 int started = 0;
1407 for (p = sourceparas; p; p = p->next)
1408 if (p->type == para_VersionID) {
1409 xhtml_versionid(fp, p->words, started);
1410 started = 1;
1411 }
1412 }
1413 if (conf.address_end)
1414 fprintf(fp, "%ls\n", conf.address_end);
1415 fprintf(fp, "</address>\n");
1416 }
1417 fprintf(fp, "</body>\n\n</html>\n");
1418}
1419
1420/*
1421 * Output the versionid paragraph. Typically this is a version control
1422 * ID string (such as $Id...$ in RCS).
1423 */
1424static void xhtml_versionid(FILE *fp, word *text, int started)
1425{
1426 rdstringc t = { 0, 0, NULL };
1427
1428 rdaddc(&t, '['); /* FIXME: configurability */
ce9921d6 1429 xhtml_rdaddwc(&t, text, NULL, FALSE);
d7482997 1430 rdaddc(&t, ']'); /* FIXME: configurability */
1431
1432 if (started)
c8c7926b 1433 fprintf(fp, "<br />\n");
d7482997 1434 fprintf(fp, "%s\n", t.text);
1435 sfree(t.text);
1436}
1437
1438/* Is this an XHTML reserved character? */
1439static int xhtml_reservedchar(int c)
1440{
1441 if (c=='&' || c=='<' || c=='>' || c=='"')
1442 return TRUE;
1443 else
1444 return FALSE;
1445}
1446
1447/*
1448 * Convert a wide string into valid XHTML: Anything outside ASCII will
1449 * be fixed up as an entity. Currently we don't worry about constraining the
1450 * encoded character set, which we should probably do at some point (we can
1451 * still fix up and return FALSE - see the last comment here). We also don't
1452 * currently
1453 *
1454 * Because this is only used for words, spaces are HARD spaces (any other
1455 * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1456 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1457 * rule).
1458 *
1459 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1460 * it in `*result'. If `result' is NULL, merely checks whether all
1461 * characters in the string are feasible.
1462 *
1463 * Return is nonzero if all characters are OK. If not all
1464 * characters are OK but `result' is non-NULL, a result _will_
1465 * still be generated!
1466 */
4b3c5afb 1467static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1468 int hard_spaces) {
d7482997 1469 int doing = (result != 0);
1470 int ok = TRUE;
1471 char *p = NULL;
1472 int plen = 0, psize = 0;
1473
4b3c5afb 1474 if (maxlen <= 0)
1475 maxlen = -1;
1476
1477 for (; *s && maxlen != 0; s++, maxlen--) {
d7482997 1478 wchar_t c = *s;
1479
1480#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1481
1482 if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1483 /* Char is OK. */
1484 if (doing)
1485 {
1486 ensure_size(plen);
1487 p[plen++] = (char)c;
1488 }
1489 } else {
1490 /* Char needs fixing up. */
1491 /* ok = FALSE; -- currently we never return FALSE; we
1492 * might want to when considering a character set for the
1493 * encoded document.
1494 */
1495 if (doing)
1496 {
1497 if (c==32) { /* a space in a word is a hard space */
1498 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1499 sprintf(p+plen, "&nbsp;");
1500 plen+=6;
1501 } else {
1502 /* FIXME: entity names! */
1503 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1504 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1505 }
1506 }
1507 }
1508 }
1509 if (doing) {
1510 p = resize(p, plen+1);
1511 p[plen] = '\0';
1512 *result = p;
1513 }
1514 return ok;
1515}
1516
1517/*
1518 * This formats the given words as XHTML.
ce9921d6 1519 *
1520 * `indexable', if FALSE, prohibits adding any index references.
1521 * You might use this, for example, if an index reference occurred
1522 * in a section title, to prevent phony index references when the
1523 * section title is processed in strange places such as contents
1524 * sections.
d7482997 1525 */
ce9921d6 1526static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
d7482997 1527 char *c;
1528 keyword *kwl;
1529 xhtmlsection *sect;
1530 indextag *itag;
1531 int ti;
1532
1533 for (; text && text != end; text = text->next) {
1534 switch (text->type) {
1535 case word_HyperLink:
1536 xhtml_utostr(text->text, &c);
1537 rdaddsc(rs, "<a href=\"");
1538 rdaddsc(rs, c);
1539 rdaddsc(rs, "\">");
1540 sfree(c);
1541 break;
1542
1543 case word_UpperXref:
1544 case word_LowerXref:
1545 kwl = kw_lookup(keywords, text->text);
1546 if (kwl) {
1547 sect=xhtml_find_section(kwl->para);
1548 if (sect) {
1549 rdaddsc(rs, "<a href=\"");
1550 rdaddsc(rs, sect->file->filename);
1551 rdaddc(rs, '#');
1552 rdaddsc(rs, sect->fragment);
1553 rdaddsc(rs, "\">");
1554 } else {
1555 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1556 error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1557 }
1558 } else {
1559 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1560 error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1561 }
1562 break;
1563
1564 case word_IndexRef: /* in theory we could make an index target here */
1565/* rdaddsc(rs, "<a name=\"idx-");
1566 xhtml_utostr(text->text, &c);
1567 rdaddsc(rs, c);
1568 sfree(c);
1569 rdaddsc(rs, "\"></a>");*/
1570 /* what we _do_ need to do is to fix up the backend data
1571 * for any indexentry this points to.
1572 */
ce9921d6 1573 if (!indexable)
1574 break;
1575
d7482997 1576 for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1577 /* FIXME: really ustricmp() and not ustrcmp()? */
1578 if (ustricmp(itag->name, text->text)==0) {
1579 break;
1580 }
1581 }
1582 if (itag!=NULL) {
1583 if (itag->refs!=NULL) {
1584 int i;
1585 for (i=0; i<itag->nrefs; i++) {
1586 xhtmlindex *idx_ref;
1587 indexentry *ientry;
1588
1589 ientry = itag->refs[i];
1590 if (ientry->backend_data==NULL) {
1591 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1592 if (idx_ref==NULL)
1593 fatal(err_nomemory);
1594 idx_ref->nsection = 0;
1595 idx_ref->size = 4;
1596 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1597 if (idx_ref->sections==NULL)
1598 fatal(err_nomemory);
1599 ientry->backend_data = idx_ref;
1600 } else {
1601 idx_ref = ientry->backend_data;
1602 if (idx_ref->nsection+1 > idx_ref->size) {
1603 int new_size = idx_ref->size * 2;
1604 idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1605 if (idx_ref->sections==NULL) {
1606 fatal(err_nomemory);
1607 }
1608 idx_ref->size = new_size;
1609 }
1610 }
1611 idx_ref->sections[idx_ref->nsection++] = currentsection;
1612#if 0
1613#endif
1614 }
1615 } else {
1616 fatal(err_whatever, "Index tag had no entries!");
1617 }
1618 } else {
1619 fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1620 fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1621 }
1622 break;
1623
1624 case word_HyperEnd:
1625 case word_XrefEnd:
1626 rdaddsc(rs, "</a>");
1627 break;
1628
1629 case word_Normal:
1630 case word_Emph:
1631 case word_Code:
1632 case word_WeakCode:
1633 case word_WhiteSpace:
1634 case word_EmphSpace:
1635 case word_CodeSpace:
1636 case word_WkCodeSpace:
1637 case word_Quote:
1638 case word_EmphQuote:
1639 case word_CodeQuote:
1640 case word_WkCodeQuote:
1641 assert(text->type != word_CodeQuote &&
1642 text->type != word_WkCodeQuote);
1643 if (towordstyle(text->type) == word_Emph &&
1644 (attraux(text->aux) == attr_First ||
1645 attraux(text->aux) == attr_Only))
1646 rdaddsc(rs, "<em>");
1647 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1648 (attraux(text->aux) == attr_First ||
1649 attraux(text->aux) == attr_Only))
1650 rdaddsc(rs, "<code>");
1651
1652 if (removeattr(text->type) == word_Normal) {
4b3c5afb 1653 if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
d7482997 1654 rdaddsc(rs, c);
1655 else
ce9921d6 1656 xhtml_rdaddwc(rs, text->alt, NULL, indexable);
d7482997 1657 sfree(c);
1658 } else if (removeattr(text->type) == word_WhiteSpace) {
1659 rdaddc(rs, ' ');
1660 } else if (removeattr(text->type) == word_Quote) {
1661 rdaddsc(rs, "&quot;");
1662 }
1663
1664 if (towordstyle(text->type) == word_Emph &&
1665 (attraux(text->aux) == attr_Last ||
1666 attraux(text->aux) == attr_Only))
1667 rdaddsc(rs, "</em>");
1668 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1669 (attraux(text->aux) == attr_Last ||
1670 attraux(text->aux) == attr_Only))
1671 rdaddsc(rs, "</code>");
1672 break;
1673 }
1674 }
1675}
1676
1677/* Output a heading, formatted as XHTML.
1678 */
ce9921d6 1679static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
d7482997 1680{
1681 rdstringc t = { 0, 0, NULL };
1682 word *tprefix = p->kwtext;
1683 word *nprefix = p->kwtext2;
1684 word *text = p->words;
1685 int level = xhtml_para_level(p);
1686 xhtmlsection *sect = xhtml_find_section(p);
5d9cc07b 1687 xhtmlheadfmt *fmt;
d7482997 1688 char *fragment;
1689 if (sect) {
1690 fragment = sect->fragment;
1691 } else {
d2e74722 1692 if (p->type == para_Title)
1693 fragment = "title";
1694 else {
1695 fragment = ""; /* FIXME: what else can we do? */
1696 error(err_whatever, "Couldn't locate heading cross-reference!");
1697 }
d7482997 1698 }
1699
5d9cc07b 1700 if (p->type == para_Title)
1701 fmt = NULL;
1702 else if (level == 1)
1703 fmt = &conf.fchapter;
1704 else if (level-1 < conf.nfsect)
1705 fmt = &conf.fsect[level-1];
1706 else
1707 fmt = &conf.fsect[conf.nfsect-1];
1708
1709 if (fmt && fmt->just_numbers && nprefix) {
ce9921d6 1710 xhtml_rdaddwc(&t, nprefix, NULL, indexable);
5d9cc07b 1711 if (fmt) {
1712 char *c;
4b3c5afb 1713 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
5d9cc07b 1714 rdaddsc(&t, c);
1715 sfree(c);
1716 }
1717 }
1718 } else if (fmt && !fmt->just_numbers && tprefix) {
ce9921d6 1719 xhtml_rdaddwc(&t, tprefix, NULL, indexable);
5d9cc07b 1720 if (fmt) {
1721 char *c;
4b3c5afb 1722 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
5d9cc07b 1723 rdaddsc(&t, c);
1724 sfree(c);
1725 }
1726 }
d7482997 1727 }
ce9921d6 1728 xhtml_rdaddwc(&t, text, NULL, indexable);
d9d3dd95 1729 /*
1730 * If we're outputting in single-file mode, we need to lower
1731 * the level of each heading by one, because the overall
1732 * document title will be sitting right at the top as an <h1>
1733 * and so chapters and sections should start at <h2>.
1734 *
1735 * Even if not, the document title will come back from
1736 * xhtml_para_level() as level zero, so we must increment that
1737 * no matter what leaf_level is set to.
1738 */
1739 if (conf.leaf_level == 0 || level == 0)
1740 level++;
d7482997 1741 fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1742 sfree(t.text);
1743}
1744
1745/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1746 * This looks pretty simple; I may have missed something ...
1747 */
ce9921d6 1748static void xhtml_para(FILE *fp, word *text, int indexable)
d7482997 1749{
1750 rdstringc out = { 0, 0, NULL };
ce9921d6 1751 xhtml_rdaddwc(&out, text, NULL, indexable);
d7482997 1752 fprintf(fp, "%s", out.text);
1753 sfree(out.text);
1754}
1755
1756/* Output a code paragraph. I'm treating this as preformatted, which
1757 * may not be entirely correct. See xhtml_para() for my worries about
1758 * this being overly-simple; however I think that most of the complexity
1759 * of the text backend came entirely out of word wrapping anyway.
1760 */
1761static void xhtml_codepara(FILE *fp, word *text)
1762{
1763 fprintf(fp, "<pre>");
1764 for (; text; text = text->next) if (text->type == word_WeakCode) {
4b3c5afb 1765 word *here, *next;
d7482997 1766 char *c;
4b3c5afb 1767
1768 /*
1769 * See if this WeakCode is followed by an Emph to indicate
1770 * emphasis.
1771 */
1772 here = text;
1773 if (text->next && text->next->type == word_Emph) {
1774 next = text = text->next;
1775 } else
1776 next = NULL;
1777
1778 if (next) {
1779 wchar_t *t, *e;
1780 int n;
1781
1782 t = here->text;
1783 e = next->text;
1784
1785 while (*e) {
1786 int ec = *e;
1787
1788 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1789 xhtml_convert(t, n, &c, FALSE);
1790 fprintf(fp, "%s%s%s",
1791 (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1792 c,
1793 (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1794 sfree(c);
1795
1796 t += n;
1797 e += n;
1798 }
1799
1800 xhtml_convert(t, 0, &c, FALSE);
1801 fprintf(fp, "%s\n", c);
1802 sfree(c);
1803 } else {
1804 xhtml_convert(here->text, 0, &c, FALSE);
1805 fprintf(fp, "%s\n", c);
1806 sfree(c);
1807 }
d7482997 1808 }
1809 fprintf(fp, "</pre>\n");
1810}