Rewrite ustrftime(), so that (a) it uses wcsftime() where available,
[sgt/halibut] / bk_xhtml.c
CommitLineData
d7482997 1/*
2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
4 *
5 * Still to do:
6 *
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
11 * perhaps others.
12 *
13 * Limitations:
14 *
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
27 */
28
29#include <stdio.h>
30#include <stdlib.h>
677e18a2 31#include <string.h>
d7482997 32#include <assert.h>
33#include "halibut.h"
34
50d6b4bd 35/*
36 * FILENAME_TEMPLATE (overridable in config of course) allows you
37 * to choose the general form for your HTML file names. It is
38 * slightly printf-styled (% followed by a single character is a
39 * formatting directive, %% is a literal %). Formatting directives
40 * are:
41 *
ba9c1487 42 * - %n is the section type-plus-number, minus whitespace (`Chapter1.2').
50d6b4bd 43 * - %b is the section number on its own (`1.2').
44 * - %k is the section's _internal_ keyword.
45 * - %N is the section's visible title in the output, again minus
46 * whitespace.
47 *
48 * %n, %b and %k will all default to %N if the section is
49 * unnumbered (`Bibliography' is often a good example).
6d6d850c 50 *
51 * FRAGMENT_TEMPLATE is the same, but defines the <a name="foo">
52 * markers used to cross-reference to particular subsections of a
53 * file.
50d6b4bd 54 */
55
56#define FILENAME_SINGLE "Manual.html"
57#define FILENAME_CONTENTS "Contents.html"
58#define FILENAME_INDEX "IndexPage.html"
59#define FILENAME_TEMPLATE "%n.html"
6d6d850c 60#define FRAGMENT_TEMPLATE "%b"
50d6b4bd 61
d7482997 62struct xhtmlsection_Struct {
63 struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
64 struct xhtmlsection_Struct *child; /* NULL if split across files */
65 struct xhtmlsection_Struct *parent; /* NULL if split across files */
66 struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
67 paragraph *para;
68 struct xhtmlfile_Struct *file; /* which file is this a part of? */
69 char *fragment; /* fragment id within the file */
70 int level;
71};
72
73struct xhtmlfile_Struct {
74 struct xhtmlfile_Struct *next;
75 struct xhtmlfile_Struct *child;
76 struct xhtmlfile_Struct *parent;
77 char *filename;
78 struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
79 int is_leaf; /* is this file a leaf file, ie does it not have any children? */
80};
81
82typedef struct xhtmlsection_Struct xhtmlsection;
83typedef struct xhtmlfile_Struct xhtmlfile;
84typedef struct xhtmlindex_Struct xhtmlindex;
85
86struct xhtmlindex_Struct {
87 int nsection;
88 int size;
89 xhtmlsection **sections;
90};
91
92typedef struct {
5d9cc07b 93 int just_numbers;
94 wchar_t *number_suffix;
95} xhtmlheadfmt;
96
97typedef struct {
d7482997 98 int contents_depth[6];
99 int leaf_contains_contents;
100 int leaf_level;
101 int leaf_smallest_contents;
102 int include_version_id;
103 wchar_t *author, *description;
104 wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
105 int suppress_address;
5d9cc07b 106 xhtmlheadfmt fchapter, *fsect;
107 int nfsect;
50d6b4bd 108 char *contents_filename, *index_filename;
6d6d850c 109 char *single_filename, *template_filename, *template_fragment;
d7482997 110} xhtmlconfig;
111
112/*static void xhtml_level(paragraph *, int);
113static void xhtml_level_0(paragraph *);
114static void xhtml_docontents(FILE *, paragraph *, int);
115static void xhtml_dosections(FILE *, paragraph *, int);
116static void xhtml_dobody(FILE *, paragraph *, int);*/
117
118static void xhtml_doheader(FILE *, word *);
119static void xhtml_dofooter(FILE *);
120static void xhtml_versionid(FILE *, word *, int);
121
122static void xhtml_utostr(wchar_t *, char **);
123static int xhtml_para_level(paragraph *);
124static int xhtml_reservedchar(int);
125
4b3c5afb 126static int xhtml_convert(wchar_t *, int, char **, int);
ce9921d6 127static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
128static void xhtml_para(FILE *, word *, int);
d7482997 129static void xhtml_codepara(FILE *, word *);
ce9921d6 130static void xhtml_heading(FILE *, paragraph *, int);
d7482997 131
132/* File-global variables are much easier than passing these things
133 * all over the place. Evil, but easier. We can replace this with a single
134 * structure at some point.
135 */
136static xhtmlconfig conf;
137static keywordlist *keywords;
138static indexdata *idx;
139static xhtmlfile *topfile;
140static xhtmlsection *topsection;
141static paragraph *sourceparas;
142static xhtmlfile *lastfile;
143static xhtmlfile *xhtml_last_file = NULL;
c8c7926b 144static int last_level=-1, start_level;
d7482997 145static xhtmlsection *currentsection;
146
147static xhtmlconfig xhtml_configure(paragraph *source)
148{
149 xhtmlconfig ret;
150
151 /*
152 * Defaults.
153 */
154 ret.contents_depth[0] = 2;
155 ret.contents_depth[1] = 3;
156 ret.contents_depth[2] = 4;
157 ret.contents_depth[3] = 5;
158 ret.contents_depth[4] = 6;
159 ret.contents_depth[5] = 7;
160 ret.leaf_level = 2;
161 ret.leaf_smallest_contents = 4;
162 ret.leaf_contains_contents = FALSE;
163 ret.include_version_id = TRUE;
164 ret.author = NULL;
165 ret.description = NULL;
166 ret.head_end = NULL;
167 ret.body = NULL;
168 ret.body_start = NULL;
169 ret.body_end = NULL;
170 ret.address_start = NULL;
171 ret.address_end = NULL;
172 ret.nav_attrs = NULL;
173 ret.suppress_address = FALSE;
174
5d9cc07b 175 ret.fchapter.just_numbers = FALSE;
e5e6bf9d 176 ret.fchapter.number_suffix = L": ";
5d9cc07b 177 ret.nfsect = 2;
178 ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
179 ret.fsect[0].just_numbers = FALSE;
e5e6bf9d 180 ret.fsect[0].number_suffix = L": ";
5d9cc07b 181 ret.fsect[1].just_numbers = TRUE;
e5e6bf9d 182 ret.fsect[1].number_suffix = L" ";
50d6b4bd 183 ret.contents_filename = strdup(FILENAME_CONTENTS);
184 ret.single_filename = strdup(FILENAME_SINGLE);
185 ret.index_filename = strdup(FILENAME_INDEX);
186 ret.template_filename = strdup(FILENAME_TEMPLATE);
6d6d850c 187 ret.template_fragment = strdup(FRAGMENT_TEMPLATE);
5d9cc07b 188
d7482997 189 for (; source; source = source->next)
190 {
191 if (source->type == para_Config)
192 {
50d6b4bd 193 if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
194 sfree(ret.contents_filename);
e4ea58f8 195 ret.contents_filename = dupstr(adv(source->origkeyword));
50d6b4bd 196 } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
197 sfree(ret.single_filename);
e4ea58f8 198 ret.single_filename = dupstr(adv(source->origkeyword));
50d6b4bd 199 } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
200 sfree(ret.index_filename);
e4ea58f8 201 ret.index_filename = dupstr(adv(source->origkeyword));
50d6b4bd 202 } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
203 sfree(ret.template_filename);
e4ea58f8 204 ret.template_filename = dupstr(adv(source->origkeyword));
6d6d850c 205 } else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) {
206 sfree(ret.template_fragment);
e4ea58f8 207 ret.template_fragment = utoa_dup(uadv(source->keyword), CS_ASCII);
50d6b4bd 208 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
d7482997 209 ret.contents_depth[0] = utoi(uadv(source->keyword));
210 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
211 ret.contents_depth[1] = utoi(uadv(source->keyword));
212 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
213 ret.contents_depth[2] = utoi(uadv(source->keyword));
214 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
215 ret.contents_depth[3] = utoi(uadv(source->keyword));
216 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
217 ret.contents_depth[4] = utoi(uadv(source->keyword));
218 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
219 ret.contents_depth[5] = utoi(uadv(source->keyword));
220 } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
221 ret.leaf_level = utoi(uadv(source->keyword));
d7482997 222 } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
223 ret.leaf_smallest_contents = utoi(uadv(source->keyword));
224 } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
225 ret.include_version_id = utob(uadv(source->keyword));
226 } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
227 ret.leaf_contains_contents = utob(uadv(source->keyword));
228 } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
229 ret.suppress_address = utob(uadv(source->keyword));
230 } else if (!ustricmp(source->keyword, L"xhtml-author")) {
231 ret.author = uadv(source->keyword);
232 } else if (!ustricmp(source->keyword, L"xhtml-description")) {
233 ret.description = uadv(source->keyword);
234 } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
235 ret.head_end = uadv(source->keyword);
236 } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
237 ret.body_start = uadv(source->keyword);
238 } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
239 ret.body = uadv(source->keyword);
240 } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
241 ret.body_end = uadv(source->keyword);
242 } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
243 ret.address_start = uadv(source->keyword);
244 } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
245 ret.address_end = uadv(source->keyword);
246 } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
247 ret.nav_attrs = uadv(source->keyword);
5d9cc07b 248 } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
249 ret.fchapter.just_numbers = utob(uadv(source->keyword));
250 } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
e5e6bf9d 251 ret.fchapter.number_suffix = uadv(source->keyword);
5d9cc07b 252 } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
253 wchar_t *p = uadv(source->keyword);
254 int n = 0;
255 if (uisdigit(*p)) {
256 n = utoi(p);
257 p = uadv(p);
258 }
259 if (n >= ret.nfsect) {
260 int i;
261 ret.fsect = resize(ret.fsect, n+1);
262 for (i = ret.nfsect; i <= n; i++)
263 ret.fsect[i] = ret.fsect[ret.nfsect-1];
264 ret.nfsect = n+1;
265 }
266 ret.fsect[n].just_numbers = utob(p);
267 } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
268 wchar_t *p = uadv(source->keyword);
269 int n = 0;
270 if (uisdigit(*p)) {
271 n = utoi(p);
272 p = uadv(p);
273 }
274 if (n >= ret.nfsect) {
275 int i;
276 ret.fsect = resize(ret.fsect, n+1);
277 for (i = ret.nfsect; i <= n; i++)
278 ret.fsect[i] = ret.fsect[ret.nfsect-1];
279 ret.nfsect = n+1;
280 }
e5e6bf9d 281 ret.fsect[n].number_suffix = p;
d7482997 282 }
283 }
284 }
285
286 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
287 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
288 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
289 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
290 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
291 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
292 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
293 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
294 return ret;
295}
296
ba9c1487 297paragraph *xhtml_config_filename(char *filename)
298{
299 /*
300 * If the user passes in a single filename as a parameter to
301 * the `--html' command-line option, then we should assume it
302 * to imply _two_ config directives:
303 * \cfg{xhtml-single-filename}{whatever} and
304 * \cfg{xhtml-leaf-level}{0}; the rationale being that the user
305 * wants their output _in that file_.
306 */
e4ea58f8 307 paragraph *p, *q;
ba9c1487 308
e4ea58f8 309 p = cmdline_cfg_simple("xhtml-single-filename", filename, NULL);
310 q = cmdline_cfg_simple("xhtml-leaf-level", "0", NULL);
311 p->next = q;
312 return p;
ba9c1487 313}
314
d7482997 315static xhtmlsection *xhtml_new_section(xhtmlsection *last)
316{
317 xhtmlsection *ret = mknew(xhtmlsection);
318 ret->next=NULL;
319 ret->child=NULL;
320 ret->parent=NULL;
321 ret->chain=last;
322 ret->para=NULL;
323 ret->file=NULL;
324 ret->fragment=NULL;
325 ret->level=-1; /* marker: end of chain */
326 return ret;
327}
328
329/* Returns NULL or the section that marks that paragraph */
330static xhtmlsection *xhtml_find_section(paragraph *p)
331{
332 xhtmlsection *ret = topsection;
333 if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
334 paragraph *p2 = sourceparas;
335 paragraph *p3 = NULL;
336 while (p2 && p2!=p) {
337 if (xhtml_para_level(p2)!=-1) {
338 p3 = p2;
339 }
340 p2=p2->next;
341 }
342 if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
343 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
344 * So don't do that, then.
345 */
346 return NULL;
347 }
348 p=p3;
349 }
350 while (ret && ret->para != p) {
351/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
352 ret=ret->chain;
353 }
354 return ret;
355}
356
6d6d850c 357static void xhtml_format(paragraph *p, char *template_string, rdstringc *r)
358{
359 char *c, *t;
360 word *w;
361 wchar_t *ws;
362
363 t = template_string;
364 while (*t) {
365 if (*t == '%' && t[1]) {
366 int fmt;
367
368 t++;
369 fmt = *t++;
370
371 if (fmt == '%') {
372 rdaddc(r, fmt);
373 continue;
374 }
375
376 w = NULL;
377 ws = NULL;
378
379 if (p->kwtext && fmt == 'n')
380 w = p->kwtext;
381 else if (p->kwtext2 && fmt == 'b')
382 w = p->kwtext2;
383 else if (p->keyword && *p->keyword && fmt == 'k')
384 ws = p->keyword;
385 else
386 w = p->words;
387
388 while (w) {
389 switch (removeattr(w->type))
390 {
391 case word_Normal:
392 /*case word_Emph:
393 case word_Code:
394 case word_WeakCode:*/
395 xhtml_utostr(w->text, &c);
396 rdaddsc(r,c);
397 sfree(c);
398 break;
399 }
400 w = w->next;
401 }
402 if (ws) {
403 xhtml_utostr(ws, &c);
404 rdaddsc(r,c);
405 sfree(c);
406 }
407 } else {
408 rdaddc(r, *t++);
409 }
410 }
411}
412
d7482997 413static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
414{
415 xhtmlfile *ret = mknew(xhtmlfile);
416
417 ret->next=NULL;
418 ret->child=NULL;
419 ret->parent=NULL;
420 ret->filename=NULL;
421 ret->sections=sect;
422 ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
423 if (sect==NULL) {
424 if (conf.leaf_level==0) { /* currently unused */
50d6b4bd 425 ret->filename = smalloc(strlen(conf.single_filename)+1);
426 sprintf(ret->filename, conf.single_filename);
d7482997 427 } else {
50d6b4bd 428 ret->filename = smalloc(strlen(conf.contents_filename)+1);
429 sprintf(ret->filename, conf.contents_filename);
d7482997 430 }
431 } else {
432 paragraph *p = sect->para;
433 rdstringc fname_c = { 0, 0, NULL };
6d6d850c 434 xhtml_format(p, conf.template_filename, &fname_c);
d7482997 435 ret->filename = rdtrimc(&fname_c);
436 }
437 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
438 return ret;
439}
440
441/*
442 * Walk the tree fixing up files which are actually leaf (ie
443 * have no children) but aren't at leaf level, so they have the
444 * leaf flag set.
445 */
446void xhtml_fixup_layout(xhtmlfile* file)
447{
448 if (file->child==NULL) {
449 file->is_leaf = TRUE;
450 } else {
451 xhtml_fixup_layout(file->child);
452 }
453 if (file->next)
454 xhtml_fixup_layout(file->next);
455}
456
457/*
458 * Create the tree structure so we know where everything goes.
459 * Method:
460 *
461 * Ignoring file splitting, we have three choices with each new section:
462 *
463 * +-----------------+-----------------+
464 * | | |
465 * X +----X----+ (1)
466 * | |
5d9cc07b 467 * Y (2)
d7482997 468 * |
469 * (3)
470 *
471 * Y is the last section we added (currentsect).
472 * If sect is the section we want to add, then:
473 *
474 * (1) if sect->level < currentsect->level
475 * (2) if sect->level == currentsect->level
476 * (3) if sect->level > currentsect->level
477 *
478 * This requires the constraint that you never skip section numbers
479 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
480 *
481 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
482 * more than one level at a time. Lots of asserts, and probably part of
483 * the algorithm here, rely on this being true. (It currently isn't
484 * enforced by halibut, however.)
485 *
486 * File splitting makes this harder. For instance, say we added at (3)
487 * above and now need to add another section. We are splitting at level
488 * 2, ie the level of Y. Z is the last section we added:
489 *
490 * +-----------------+-----------------+
491 * | | |
492 * X +----X----+ (1)
493 * | |
494 * +----Y----+ (1)
495 * | |
496 * Z (2)
497 * |
498 * (3)
499 *
500 * The (1) case is now split; we need to search upwards to find where
501 * to actually link in. The other two cases remain the same (and will
502 * always be like this).
503 *
504 * File splitting makes this harder, however. The decision of whether
505 * to split to a new file is always on the same condition, however (is
506 * the level of this section higher than the leaf_level configuration
507 * value or not).
508 *
509 * Treating the cases backwards:
510 *
511 * (3) same file if sect->level > conf.leaf_level, otherwise new file
512 *
513 * if in the same file, currentsect->child points to sect
514 * otherwise the linking is done through the file tree (which works
515 * in more or less the same way, ie currentfile->child points to
516 * the new file)
517 *
518 * (2) same file if sect->level > conf.leaf_level, otherwise new file
519 *
520 * if in the same file, currentsect->next points to sect
521 * otherwise file linking and currentfile->next points to the new
522 * file (we know that Z must have caused a new file to be created)
523 *
524 * (1) same file if sect->level > conf.leaf_level, otherwise new file
525 *
526 * this is actually effectively the same case as (2) here,
527 * except that we first have to travel up the sections to figure
528 * out which section this new one will be a sibling of. In doing
529 * so, we may disappear off the top of a file and have to go up
530 * to its parent in the file tree.
531 *
532 */
533static void xhtml_ponder_layout(paragraph *p)
534{
535 xhtmlsection *lastsection;
536 xhtmlsection *currentsect;
537 xhtmlfile *currentfile;
538
539 lastfile = NULL;
540 topsection = xhtml_new_section(NULL);
541 topfile = xhtml_new_file(NULL);
542 lastsection = topsection;
543 currentfile = topfile;
544 currentsect = topsection;
545
d2e74722 546 if (conf.leaf_level == 0) {
547 topfile->is_leaf = 1;
548 topfile->sections = topsection;
549 topsection->file = topfile;
550 }
551
d7482997 552 for (; p; p=p->next)
553 {
554 int level = xhtml_para_level(p);
555 if (level>0) /* actually a section */
556 {
557 xhtmlsection *sect;
6d6d850c 558 rdstringc frag_c = { 0, 0, NULL };
d7482997 559
560 sect = xhtml_new_section(lastsection);
561 lastsection = sect;
562 sect->para = p;
6d6d850c 563
564 xhtml_format(p, conf.template_fragment, &frag_c);
565 sect->fragment = rdtrimc(&frag_c);
d7482997 566 sect->level = level;
567 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
568
569 if (level>currentsect->level) { /* case (3) */
570 if (level>conf.leaf_level) { /* same file */
571 assert(currentfile->is_leaf);
572 currentsect->child = sect;
573 sect->parent=currentsect;
574 sect->file=currentfile;
575 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
576 currentsect=sect;
577 } else { /* new file */
578 xhtmlfile *file = xhtml_new_file(sect);
579 assert(!currentfile->is_leaf);
580 currentfile->child=file;
581 sect->file=file;
582 file->parent=currentfile;
583 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
584 currentfile=file;
585 currentsect=sect;
586 }
587 } else if (level >= currentsect->file->sections->level) {
588 /* Case (1) or (2) *AND* still under the section that starts
589 * the current file.
590 *
591 * I'm not convinced that this couldn't be rolled in with the
592 * final else {} leg further down. It seems a lot of effort
593 * this way.
594 */
595 if (level>conf.leaf_level) { /* stick within the same file */
596 assert(currentfile->is_leaf);
597 sect->file = currentfile;
598 while (currentsect && currentsect->level > level &&
599 currentsect->file==currentsect->parent->file) {
600 currentsect = currentsect->parent;
601 }
602 assert(currentsect);
603 currentsect->next = sect;
604 assert(currentsect->level == sect->level);
605 sect->parent = currentsect->parent;
606 currentsect = sect;
607 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
608 } else { /* new file */
609 xhtmlfile *file = xhtml_new_file(sect);
610 sect->file=file;
611 currentfile->next=file;
612 file->parent=currentfile->parent;
613 file->is_leaf=(level==conf.leaf_level);
614 file->sections=sect;
615 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
616 currentfile=file;
617 currentsect=sect;
618 }
619 } else { /* Case (1) or (2) and we must move up the file tree first */
620 /* this loop is now probably irrelevant - we know we can't connect
621 * to anything in the current file */
622 while (currentsect && level<currentsect->level) {
623 currentsect=currentsect->parent;
624 if (currentsect) {
625 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
626 } else {
627 /* printf(" * up one level (off top of current file)\n");*/
628 }
629 }
630 if (currentsect) {
631 /* I'm pretty sure this can now never fire */
632 assert(currentfile->is_leaf);
633 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
634 sect->file = currentfile;
635 currentsect->next=sect;
636 currentsect=sect;
637 } else { /* find a file we can attach to */
638 while (currentfile && currentfile->sections && level<currentfile->sections->level) {
639 currentfile=currentfile->parent;
640 if (currentfile) {
641 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
642 } else {
643 /* printf(" * up one file level (off top of tree)\n");*/
644 }
645 }
646 if (currentfile) { /* new file (we had to skip up a file to
647 get here, so we must be dealing with a
648 level no lower than the configured
649 leaf_level */
650 xhtmlfile *file = xhtml_new_file(sect);
651 currentfile->next=file;
652 sect->file=file;
653 file->parent=currentfile->parent;
654 file->is_leaf=(level==conf.leaf_level);
655 file->sections=sect;
656 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
657 currentfile=file;
658 currentsect=sect;
659 } else {
660 fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
661 }
662 }
663 }
664 }
665 }
666 topsection = lastsection; /* get correct end of the chain */
667 xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
668}
669
670static void xhtml_do_index();
671static void xhtml_do_file(xhtmlfile *file);
672static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
ce9921d6 673static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
d7482997 674static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
675static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
676static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
677static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
678static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
679static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
680
681/*
682 * Do all the files in this structure.
683 */
684static void xhtml_do_files(xhtmlfile *file)
685{
686 xhtml_do_file(file);
687 if (file->child)
688 xhtml_do_files(file->child);
689 if (file->next)
690 xhtml_do_files(file->next);
691}
692
693/*
694 * Free up all memory used by the file tree from 'xfile' downwards
695 */
696static void xhtml_free_file(xhtmlfile* xfile)
697{
698 if (xfile==NULL) {
699 return;
700 }
701
702 if (xfile->filename) {
703 sfree(xfile->filename);
704 }
705 xhtml_free_file(xfile->child);
706 xhtml_free_file(xfile->next);
707 sfree(xfile);
708}
709
710/*
711 * Main function.
712 */
713void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
43341922 714 indexdata *in_idx, void *unused)
d7482997 715{
716/* int i;*/
717 indexentry *ientry;
718 int ti;
719 xhtmlsection *xsect;
720
43341922 721 IGNORE(unused);
722
d7482997 723 sourceparas = sourceform;
724 conf = xhtml_configure(sourceform);
725 keywords = in_keywords;
726 idx = in_idx;
727
728 /* Clear up the index entries backend data pointers */
729 for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
730 ientry->backend_data=NULL;
731 }
732
733 xhtml_ponder_layout(sourceform);
734
735 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
736/* xhtml_level_0(sourceform);
737 for (i=1; i<=conf.leaf_level; i++)
738 {
739 xhtml_level(sourceform, i);
740 }*/
741
742 /* new system ... (writes to *.html, but isn't fully trusted) */
743 xhtml_do_top_file(topfile, sourceform);
744 assert(!topfile->next); /* shouldn't have a sibling at all */
d2e74722 745 if (topfile->child) {
746 xhtml_do_files(topfile->child);
747 xhtml_do_index();
748 }
d7482997 749
750 /* release file, section, index data structures */
751 xsect = topsection;
752 while (xsect) {
753 xhtmlsection *tmp = xsect->chain;
754 if (xsect->fragment) {
755 sfree(xsect->fragment);
756 }
757 sfree(xsect);
758 xsect = tmp;
759 }
760 xhtml_free_file(topfile);
761 for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
762 if (ientry->backend_data!=NULL) {
763 xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
764 if (xi->sections!=NULL) {
765 sfree(xi->sections);
766 }
767 sfree(xi);
768 }
769 ientry->backend_data = NULL;
770 }
e5e6bf9d 771 sfree(conf.fsect);
d7482997 772}
773
774static int xhtml_para_level(paragraph *p)
775{
776 switch (p->type)
777 {
d9d3dd95 778 case para_Title:
779 return 0;
780 break;
d7482997 781 case para_UnnumberedChapter:
782 case para_Chapter:
783 case para_Appendix:
784 return 1;
785 break;
786/* case para_BiblioCited:
787 return 2;
788 break;*/
789 case para_Heading:
790 case para_Subsect:
791 return p->aux+2;
792 break;
793 default:
794 return -1;
795 break;
796 }
797}
798
d7482997 799/* Output the nav links for the current file.
800 * file == NULL means we're doing the index
801 */
802static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
803{
804 xhtmlfile *xhtml_next_file = NULL;
805 fprintf(fp, "<p");
806 if (conf.nav_attrs!=NULL) {
807 fprintf(fp, " %ls>", conf.nav_attrs);
808 } else {
809 fprintf(fp, ">");
810 }
811 if (xhtml_last_file==NULL) {
812 fprintf(fp, "Previous | ");
813 } else {
503e4ab0 814 fprintf(fp, "<a href=\"%s\">Previous</a> | ", xhtml_last_file->filename);
d7482997 815 }
503e4ab0 816 fprintf(fp, "<a href=\"%s\">Contents</a> | ", conf.contents_filename);
25acf71d 817 if (file == NULL) {
818 fprintf(fp, "Index | ");
819 } else {
503e4ab0 820 fprintf(fp, "<a href=\"%s\">Index</a> | ", conf.index_filename);
25acf71d 821 }
d7482997 822 if (file != NULL) { /* otherwise we're doing nav links for the index */
823 if (xhtml_next_file==NULL)
824 xhtml_next_file = file->child;
825 if (xhtml_next_file==NULL)
826 xhtml_next_file = file->next;
827 if (xhtml_next_file==NULL)
828 xhtml_next_file = file->parent->next;
829 }
830 if (xhtml_next_file==NULL) {
831 if (file==NULL) { /* index, so no next file */
832 fprintf(fp, "Next ");
833 } else {
503e4ab0 834 fprintf(fp, "<a href=\"%s\">Next</a>", conf.index_filename);
d7482997 835 }
836 } else {
503e4ab0 837 fprintf(fp, "<a href=\"%s\">Next</a>", xhtml_next_file->filename);
d7482997 838 }
839 fprintf(fp, "</p>\n");
840}
841
842/* Write out the index file */
d2e74722 843static void xhtml_do_index_body(FILE *fp)
d7482997 844{
d7482997 845 indexentry *y;
846 int ti;
d7482997 847
d2e74722 848 if (count234(idx->entries) == 0)
849 return; /* don't write anything at all */
d7482997 850
851 fprintf(fp, "<dl>\n");
852 /* iterate over idx->entries using the tree functions and display everything */
853 for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
854 if (y->backend_data) {
855 int i;
856 xhtmlindex *xi;
857
858 fprintf(fp, "<dt>");
ce9921d6 859 xhtml_para(fp, y->text, FALSE);
d7482997 860 fprintf(fp, "</dt>\n<dd>");
861
862 xi = (xhtmlindex*) y->backend_data;
863 for (i=0; i<xi->nsection; i++) {
864 xhtmlsection *sect = xi->sections[i];
865 if (sect) {
503e4ab0 866 fprintf(fp, "<a href=\"%s#%s\">", sect->file->filename, sect->fragment);
d7482997 867 if (sect->para->kwtext) {
ce9921d6 868 xhtml_para(fp, sect->para->kwtext, FALSE);
d7482997 869 } else if (sect->para->words) {
ce9921d6 870 xhtml_para(fp, sect->para->words, FALSE);
d7482997 871 }
872 fprintf(fp, "</a>");
873 if (i+1<xi->nsection) {
874 fprintf(fp, ", ");
875 }
876 }
877 }
878 fprintf(fp, "</dd>\n");
879 }
880 }
881 fprintf(fp, "</dl>\n");
d2e74722 882}
883static void xhtml_do_index()
884{
5dd44dce 885 word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index",
886 { NULL, 0, 0}, NULL };
50d6b4bd 887 FILE *fp = fopen(conf.index_filename, "w");
d2e74722 888
889 if (fp==NULL)
50d6b4bd 890 fatal(err_cantopenw, conf.index_filename);
d2e74722 891 xhtml_doheader(fp, &temp_word);
892 xhtml_donavlinks(fp, NULL);
893
894 xhtml_do_index_body(fp);
d7482997 895
896 xhtml_donavlinks(fp, NULL);
897 xhtml_dofooter(fp);
898 fclose(fp);
899}
900
901/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
902static void xhtml_do_file(xhtmlfile *file)
903{
904 FILE *fp = fopen(file->filename, "w");
905 if (fp==NULL)
906 fatal(err_cantopenw, file->filename);
907
908 if (file->sections->para->words) {
909 xhtml_doheader(fp, file->sections->para->words);
910 } else if (file->sections->para->kwtext) {
911 xhtml_doheader(fp, file->sections->para->kwtext);
912 } else {
913 xhtml_doheader(fp, NULL);
914 }
915
916 xhtml_donavlinks(fp, file);
917
d2e74722 918 if (file->is_leaf && conf.leaf_contains_contents &&
919 xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
d7482997 920 xhtml_do_contents(fp, file);
921 xhtml_do_sections(fp, file->sections);
922 if (!file->is_leaf)
923 xhtml_do_naked_contents(fp, file);
924
925 xhtml_donavlinks(fp, file);
926
927 xhtml_dofooter(fp);
928 fclose(fp);
929
930 xhtml_last_file = file;
931}
932
933/* Output the top-level file. */
934static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
935{
936 paragraph *p;
937 int done=FALSE;
938 FILE *fp = fopen(file->filename, "w");
939 if (fp==NULL)
940 fatal(err_cantopenw, file->filename);
941
942 /* Do the title -- only one allowed */
943 for (p = sourceform; p && !done; p = p->next)
944 {
945 if (p->type == para_Title)
946 {
947 xhtml_doheader(fp, p->words);
948 done=TRUE;
949 }
950 }
951 if (!done)
952 xhtml_doheader(fp, NULL /* Eek! */);
953
d2e74722 954 /*
955 * Display the title.
956 */
957 for (p = sourceform; p; p = p->next)
958 {
959 if (p->type == para_Title) {
ce9921d6 960 xhtml_heading(fp, p, FALSE);
d2e74722 961 break;
962 }
963 }
964
9057a0a8 965 /* Do the preamble */
d7482997 966 for (p = sourceform; p; p = p->next)
967 {
8902e0ed 968 if (p->type == para_Chapter || p->type == para_Heading ||
969 p->type == para_Subsect || p->type == para_Appendix ||
970 p->type == para_UnnumberedChapter) {
971 /*
972 * We've found the end of the preamble. Do every normal
973 * paragraph up to there.
974 */
ce9921d6 975 xhtml_do_paras(fp, sourceform, p, FALSE);
8902e0ed 976 break;
d7482997 977 }
978 }
d7482997 979
980 xhtml_do_contents(fp, file);
981 xhtml_do_sections(fp, file->sections);
d2e74722 982
5d9cc07b 983 /*
984 * Put the index in the top file if we're in single-file mode
985 * (leaf-level 0).
986 */
987 if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
d2e74722 988 fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
989 xhtml_do_index_body(fp);
990 }
991
d7482997 992 xhtml_dofooter(fp);
993 fclose(fp);
994}
995
996/* Convert a Unicode string to an ASCII one. '?' is
997 * used for unmappable characters.
998 */
999static void xhtml_utostr(wchar_t *in, char **out)
1000{
1001 int l = ustrlen(in);
1002 int i;
1003 *out = smalloc(l+1);
1004 for (i=0; i<l; i++)
1005 {
1006 if (in[i]>=32 && in[i]<=126)
1007 (*out)[i]=(char)in[i];
1008 else
1009 (*out)[i]='?';
1010 }
1011 (*out)[i]=0;
1012}
1013
1014/*
1015 * Write contents for the given file, and subfiles, down to
1016 * the appropriate contents depth. Returns the number of
1017 * entries written.
1018 */
1019static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
1020{
c8c7926b 1021 int level, limit, count = 0;
d7482997 1022 if (!file)
1023 return 0;
1024
1025 level = (file->sections)?(file->sections->level):(0);
1026 limit = conf.contents_depth[(level>5)?(5):(level)];
1027 start_level = (file->is_leaf) ? (level-1) : (level);
1028 last_level = start_level;
1029
1030 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1031 count += xhtml_do_contents_limit(fp, file->child, limit);
1032 if (fp!=NULL) {
1033 while (last_level > start_level) {
1034 last_level--;
c8c7926b 1035 fprintf(fp, "</li></ul>\n");
d7482997 1036 }
1037 }
1038 return count;
1039}
1040
1041/* As above, but doesn't do anything in the current file */
1042static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
1043{
1044 int level, limit, start_level, count = 0;
1045 if (!file)
1046 return 0;
1047
1048 level = (file->sections)?(file->sections->level):(0);
1049 limit = conf.contents_depth[(level>5)?(5):(level)];
1050 start_level = (file->is_leaf) ? (level-1) : (level);
1051 last_level = start_level;
1052
1053 count = xhtml_do_contents_limit(fp, file->child, limit);
1054 if (fp!=NULL) {
1055 while (last_level > start_level) {
1056 last_level--;
c8c7926b 1057 fprintf(fp, "</li></ul>\n");
d7482997 1058 }
1059 }
1060 return count;
1061}
1062
1063/*
1064 * Write contents for the given file, children, and siblings, down to
1065 * given limit contents depth.
1066 */
1067static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
1068{
1069 int count = 0;
1070 while (file) {
1071 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1072 count += xhtml_do_contents_limit(fp, file->child, limit);
1073 file = file->next;
1074 }
1075 return count;
1076}
1077
1078/*
1079 * Write contents entries for the given section tree, down to the
1080 * limit contents depth.
1081 */
1082static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
1083{
1084 int count = 0;
1085 while (section) {
1086 if (!xhtml_add_contents_entry(fp, section, limit))
1087 return 0;
1088 else
1089 count++;
1090 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1091 section = section->next;
1092 }
1093 return count;
1094}
1095
1096/*
1097 * Write contents entries for the given section tree, down to the
1098 * limit contents depth.
1099 */
1100static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1101{
1102 int count = 0;
1103 if (!section)
1104 return 0;
1105 xhtml_add_contents_entry(fp, section, limit);
1106 count=1;
1107 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1108 /* section=section->child;
1109 while (section && xhtml_add_contents_entry(fp, section, limit)) {
1110 section = section->next;
1111 }*/
1112 return count;
1113}
1114
1115/*
1116 * Add a section entry, unless we're exceeding the limit, in which
1117 * case return FALSE (otherwise return TRUE).
1118 */
1119static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1120{
1121 if (!section || section->level > limit)
1122 return FALSE;
5d9cc07b 1123 if (fp==NULL || section->level < 0)
d7482997 1124 return TRUE;
c8c7926b 1125 if (last_level > section->level) {
1126 while (last_level > section->level) {
1127 last_level--;
1128 fprintf(fp, "</li></ul>\n");
1129 }
1130 fprintf(fp, "</li>\n");
1131 } else if (last_level < section->level) {
1132 assert(last_level == section->level - 1);
d7482997 1133 last_level++;
1134 fprintf(fp, "<ul>\n");
c8c7926b 1135 } else {
1136 fprintf(fp, "</li>\n");
d7482997 1137 }
1138 fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1139 if (section->para->kwtext) {
ce9921d6 1140 xhtml_para(fp, section->para->kwtext, FALSE);
d7482997 1141 if (section->para->words) {
1142 fprintf(fp, ": ");
1143 }
1144 }
1145 if (section->para->words) {
ce9921d6 1146 xhtml_para(fp, section->para->words, FALSE);
d7482997 1147 }
c8c7926b 1148 fprintf(fp, "</a>\n");
d7482997 1149 return TRUE;
1150}
1151
1152/*
1153 * Write all the sections in this file. Do all paragraphs in this section, then all
1154 * children (recursively), then go on to the next one (tail recursively).
1155 */
1156static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1157{
1158 while (sections) {
1159 currentsection = sections;
ce9921d6 1160 xhtml_do_paras(fp, sections->para, NULL, TRUE);
d7482997 1161 xhtml_do_sections(fp, sections->child);
1162 sections = sections->next;
1163 }
1164}
1165
1166/* Write this list of paragraphs. Close off all lists at the end. */
ce9921d6 1167static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1168 int indexable)
d7482997 1169{
7136a6c7 1170 int last_type = -1, ptype, first=TRUE;
1171 stack lcont_stack = stk_new();
d7482997 1172 if (!p)
1173 return;
1174
1175/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
8902e0ed 1176 for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
d7482997 1177 first=FALSE;
7136a6c7 1178 switch (ptype = p->type)
d7482997 1179 {
1180 /*
1181 * Things we ignore because we've already processed them or
1182 * aren't going to touch them in this pass.
1183 */
1184 case para_IM:
1185 case para_BR:
1186 case para_Biblio: /* only touch BiblioCited */
1187 case para_VersionID:
d7482997 1188 case para_NoCite:
1189 case para_Title:
1190 break;
1191
1192 /*
1193 * Chapter titles.
1194 */
1195 case para_Chapter:
1196 case para_Appendix:
1197 case para_UnnumberedChapter:
ce9921d6 1198 xhtml_heading(fp, p, indexable);
d7482997 1199 break;
1200
1201 case para_Heading:
1202 case para_Subsect:
ce9921d6 1203 xhtml_heading(fp, p, indexable);
d7482997 1204 break;
1205
1206 case para_Rule:
1207 fprintf(fp, "\n<hr />\n");
1208 break;
1209
1210 case para_Normal:
9057a0a8 1211 case para_Copyright:
d7482997 1212 fprintf(fp, "\n<p>");
ce9921d6 1213 xhtml_para(fp, p->words, indexable);
d7482997 1214 fprintf(fp, "</p>\n");
1215 break;
1216
7136a6c7 1217 case para_LcontPush:
1218 {
1219 int *p;
1220 p = mknew(int);
1221 *p = last_type;
1222 stk_push(lcont_stack, p);
1223 last_type = para_Normal;
1224 }
1225 break;
1226 case para_LcontPop:
1227 {
1228 int *p = stk_pop(lcont_stack);
1229 assert(p);
1230 ptype = last_type = *p;
1231 sfree(p);
1232 goto closeofflist; /* ick */
1233 }
1234 break;
2614b01d 1235 case para_QuotePush:
1236 fprintf(fp, "<blockquote>\n");
1237 break;
1238 case para_QuotePop:
1239 fprintf(fp, "</blockquote>\n");
1240 break;
7136a6c7 1241
d7482997 1242 case para_Bullet:
1243 case para_NumberedList:
7136a6c7 1244 case para_Description:
1245 case para_DescribedThing:
d7482997 1246 case para_BiblioCited:
c8c7926b 1247 if (last_type!=p->type &&
1248 !(last_type==para_DescribedThing && p->type==para_Description) &&
1249 !(last_type==para_Description && p->type==para_DescribedThing)) {
d7482997 1250 /* start up list if necessary */
1251 if (p->type == para_Bullet) {
1252 fprintf(fp, "<ul>\n");
1253 } else if (p->type == para_NumberedList) {
1254 fprintf(fp, "<ol>\n");
7136a6c7 1255 } else if (p->type == para_BiblioCited ||
1256 p->type == para_DescribedThing ||
1257 p->type == para_Description) {
d7482997 1258 fprintf(fp, "<dl>\n");
1259 }
1260 }
7136a6c7 1261 if (p->type == para_Bullet || p->type == para_NumberedList) {
d7482997 1262 fprintf(fp, "<li>");
7136a6c7 1263 } else if (p->type == para_DescribedThing) {
1264 fprintf(fp, "<dt>");
1265 } else if (p->type == para_Description) {
1266 fprintf(fp, "<dd>");
1267 } else if (p->type == para_BiblioCited) {
d7482997 1268 fprintf(fp, "<dt>");
ce9921d6 1269 xhtml_para(fp, p->kwtext, indexable);
d7482997 1270 fprintf(fp, "</dt>\n<dd>");
1271 }
ce9921d6 1272 xhtml_para(fp, p->words, indexable);
7136a6c7 1273 {
1274 paragraph *p2 = p->next;
1275 if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1276 break;
1277 }
1278
1279 closeofflist:
1280 if (ptype == para_BiblioCited) {
d7482997 1281 fprintf(fp, "</dd>\n");
c8c7926b 1282 } else if (ptype == para_DescribedThing) {
7136a6c7 1283 fprintf(fp, "</dt>");
c8c7926b 1284 } else if (ptype == para_Description) {
7136a6c7 1285 fprintf(fp, "</dd>");
1286 } else if (ptype == para_Bullet || ptype == para_NumberedList) {
d7482997 1287 fprintf(fp, "</li>");
1288 }
7136a6c7 1289 if (ptype == para_Bullet || ptype == para_NumberedList ||
1290 ptype == para_BiblioCited || ptype == para_Description ||
1291 ptype == para_DescribedThing)
d7482997 1292 /* close off list if necessary */
1293 {
1294 paragraph *p2 = p->next;
1295 int close_off=FALSE;
1296/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1297 if (p2 && xhtml_para_level(p2)==-1) {
c8c7926b 1298 if (p2->type != ptype &&
1299 !(p2->type==para_DescribedThing && ptype==para_Description) &&
1300 !(p2->type==para_Description && ptype==para_DescribedThing) &&
1301 p2->type != para_LcontPush)
d7482997 1302 close_off=TRUE;
1303 } else {
1304 close_off=TRUE;
1305 }
1306 if (close_off) {
7136a6c7 1307 if (ptype == para_Bullet) {
d7482997 1308 fprintf(fp, "</ul>\n");
7136a6c7 1309 } else if (ptype == para_NumberedList) {
d7482997 1310 fprintf(fp, "</ol>\n");
7136a6c7 1311 } else if (ptype == para_BiblioCited ||
1312 ptype == para_Description ||
1313 ptype == para_DescribedThing) {
d7482997 1314 fprintf(fp, "</dl>\n");
1315 }
1316 }
1317 }
1318 break;
1319
1320 case para_Code:
1321 xhtml_codepara(fp, p->words);
1322 break;
1323 }
7136a6c7 1324 last_type = ptype;
d7482997 1325 }
7136a6c7 1326
1327 stk_free(lcont_stack);
d7482997 1328}
1329
1330/*
1331 * Output a header for this XHTML file.
1332 */
1333static void xhtml_doheader(FILE *fp, word *title)
1334{
1335 fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1336 fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
503e4ab0 1337 fprintf(fp, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n<head>\n<title>");
d7482997 1338 if (title==NULL)
1339 fprintf(fp, "The thing with no name!");
1340 else
ce9921d6 1341 xhtml_para(fp, title, FALSE);
d7482997 1342 fprintf(fp, "</title>\n");
1343 fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1344 if (conf.author)
1345 fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1346 if (conf.description)
1347 fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1348 if (conf.head_end)
1349 fprintf(fp, "%ls\n", conf.head_end);
1350 fprintf(fp, "</head>\n\n");
1351 if (conf.body)
1352 fprintf(fp, "%ls\n", conf.body);
1353 else
1354 fprintf(fp, "<body>\n");
1355 if (conf.body_start)
1356 fprintf(fp, "%ls\n", conf.body_start);
1357}
1358
1359/*
1360 * Output a footer for this XHTML file.
1361 */
1362static void xhtml_dofooter(FILE *fp)
1363{
1364 fprintf(fp, "\n<hr />\n\n");
1365 if (conf.body_end)
1366 fprintf(fp, "%ls\n", conf.body_end);
1367 if (!conf.suppress_address) {
1368 fprintf(fp,"<address>\n");
1369 if (conf.address_start)
1370 fprintf(fp, "%ls\n", conf.address_start);
1371 /* Do the version ID */
1372 if (conf.include_version_id) {
1373 paragraph *p;
1374 int started = 0;
1375 for (p = sourceparas; p; p = p->next)
1376 if (p->type == para_VersionID) {
1377 xhtml_versionid(fp, p->words, started);
1378 started = 1;
1379 }
1380 }
1381 if (conf.address_end)
1382 fprintf(fp, "%ls\n", conf.address_end);
1383 fprintf(fp, "</address>\n");
1384 }
1385 fprintf(fp, "</body>\n\n</html>\n");
1386}
1387
1388/*
1389 * Output the versionid paragraph. Typically this is a version control
1390 * ID string (such as $Id...$ in RCS).
1391 */
1392static void xhtml_versionid(FILE *fp, word *text, int started)
1393{
1394 rdstringc t = { 0, 0, NULL };
1395
1396 rdaddc(&t, '['); /* FIXME: configurability */
ce9921d6 1397 xhtml_rdaddwc(&t, text, NULL, FALSE);
d7482997 1398 rdaddc(&t, ']'); /* FIXME: configurability */
1399
1400 if (started)
c8c7926b 1401 fprintf(fp, "<br />\n");
d7482997 1402 fprintf(fp, "%s\n", t.text);
1403 sfree(t.text);
1404}
1405
1406/* Is this an XHTML reserved character? */
1407static int xhtml_reservedchar(int c)
1408{
1409 if (c=='&' || c=='<' || c=='>' || c=='"')
1410 return TRUE;
1411 else
1412 return FALSE;
1413}
1414
1415/*
1416 * Convert a wide string into valid XHTML: Anything outside ASCII will
1417 * be fixed up as an entity. Currently we don't worry about constraining the
1418 * encoded character set, which we should probably do at some point (we can
1419 * still fix up and return FALSE - see the last comment here). We also don't
1420 * currently
1421 *
1422 * Because this is only used for words, spaces are HARD spaces (any other
1423 * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1424 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1425 * rule).
1426 *
1427 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1428 * it in `*result'. If `result' is NULL, merely checks whether all
1429 * characters in the string are feasible.
1430 *
1431 * Return is nonzero if all characters are OK. If not all
1432 * characters are OK but `result' is non-NULL, a result _will_
1433 * still be generated!
1434 */
4b3c5afb 1435static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1436 int hard_spaces) {
d7482997 1437 int doing = (result != 0);
1438 int ok = TRUE;
1439 char *p = NULL;
1440 int plen = 0, psize = 0;
1441
4b3c5afb 1442 if (maxlen <= 0)
1443 maxlen = -1;
1444
1445 for (; *s && maxlen != 0; s++, maxlen--) {
d7482997 1446 wchar_t c = *s;
1447
1448#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1449
1450 if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1451 /* Char is OK. */
1452 if (doing)
1453 {
1454 ensure_size(plen);
1455 p[plen++] = (char)c;
1456 }
1457 } else {
1458 /* Char needs fixing up. */
1459 /* ok = FALSE; -- currently we never return FALSE; we
1460 * might want to when considering a character set for the
1461 * encoded document.
1462 */
1463 if (doing)
1464 {
1465 if (c==32) { /* a space in a word is a hard space */
1466 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1467 sprintf(p+plen, "&nbsp;");
1468 plen+=6;
1469 } else {
1470 /* FIXME: entity names! */
1471 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1472 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1473 }
1474 }
1475 }
1476 }
1477 if (doing) {
1478 p = resize(p, plen+1);
1479 p[plen] = '\0';
1480 *result = p;
1481 }
1482 return ok;
1483}
1484
1485/*
1486 * This formats the given words as XHTML.
ce9921d6 1487 *
1488 * `indexable', if FALSE, prohibits adding any index references.
1489 * You might use this, for example, if an index reference occurred
1490 * in a section title, to prevent phony index references when the
1491 * section title is processed in strange places such as contents
1492 * sections.
d7482997 1493 */
ce9921d6 1494static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
d7482997 1495 char *c;
1496 keyword *kwl;
1497 xhtmlsection *sect;
1498 indextag *itag;
1499 int ti;
1500
1501 for (; text && text != end; text = text->next) {
1502 switch (text->type) {
1503 case word_HyperLink:
1504 xhtml_utostr(text->text, &c);
1505 rdaddsc(rs, "<a href=\"");
1506 rdaddsc(rs, c);
1507 rdaddsc(rs, "\">");
1508 sfree(c);
1509 break;
1510
1511 case word_UpperXref:
1512 case word_LowerXref:
1513 kwl = kw_lookup(keywords, text->text);
1514 if (kwl) {
1515 sect=xhtml_find_section(kwl->para);
1516 if (sect) {
1517 rdaddsc(rs, "<a href=\"");
1518 rdaddsc(rs, sect->file->filename);
1519 rdaddc(rs, '#');
1520 rdaddsc(rs, sect->fragment);
1521 rdaddsc(rs, "\">");
1522 } else {
1523 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1524 error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1525 }
1526 } else {
1527 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1528 error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1529 }
1530 break;
1531
1532 case word_IndexRef: /* in theory we could make an index target here */
1533/* rdaddsc(rs, "<a name=\"idx-");
1534 xhtml_utostr(text->text, &c);
1535 rdaddsc(rs, c);
1536 sfree(c);
1537 rdaddsc(rs, "\"></a>");*/
1538 /* what we _do_ need to do is to fix up the backend data
1539 * for any indexentry this points to.
1540 */
ce9921d6 1541 if (!indexable)
1542 break;
1543
d7482997 1544 for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1545 /* FIXME: really ustricmp() and not ustrcmp()? */
1546 if (ustricmp(itag->name, text->text)==0) {
1547 break;
1548 }
1549 }
1550 if (itag!=NULL) {
1551 if (itag->refs!=NULL) {
1552 int i;
1553 for (i=0; i<itag->nrefs; i++) {
1554 xhtmlindex *idx_ref;
1555 indexentry *ientry;
1556
1557 ientry = itag->refs[i];
1558 if (ientry->backend_data==NULL) {
1559 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1560 if (idx_ref==NULL)
1561 fatal(err_nomemory);
1562 idx_ref->nsection = 0;
1563 idx_ref->size = 4;
1564 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1565 if (idx_ref->sections==NULL)
1566 fatal(err_nomemory);
1567 ientry->backend_data = idx_ref;
1568 } else {
1569 idx_ref = ientry->backend_data;
1570 if (idx_ref->nsection+1 > idx_ref->size) {
1571 int new_size = idx_ref->size * 2;
1572 idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1573 if (idx_ref->sections==NULL) {
1574 fatal(err_nomemory);
1575 }
1576 idx_ref->size = new_size;
1577 }
1578 }
1579 idx_ref->sections[idx_ref->nsection++] = currentsection;
1580#if 0
1581#endif
1582 }
1583 } else {
1584 fatal(err_whatever, "Index tag had no entries!");
1585 }
1586 } else {
1587 fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1588 fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1589 }
1590 break;
1591
1592 case word_HyperEnd:
1593 case word_XrefEnd:
1594 rdaddsc(rs, "</a>");
1595 break;
1596
1597 case word_Normal:
1598 case word_Emph:
1599 case word_Code:
1600 case word_WeakCode:
1601 case word_WhiteSpace:
1602 case word_EmphSpace:
1603 case word_CodeSpace:
1604 case word_WkCodeSpace:
1605 case word_Quote:
1606 case word_EmphQuote:
1607 case word_CodeQuote:
1608 case word_WkCodeQuote:
1609 assert(text->type != word_CodeQuote &&
1610 text->type != word_WkCodeQuote);
1611 if (towordstyle(text->type) == word_Emph &&
1612 (attraux(text->aux) == attr_First ||
1613 attraux(text->aux) == attr_Only))
1614 rdaddsc(rs, "<em>");
1615 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1616 (attraux(text->aux) == attr_First ||
1617 attraux(text->aux) == attr_Only))
1618 rdaddsc(rs, "<code>");
1619
1620 if (removeattr(text->type) == word_Normal) {
12efc259 1621 if (xhtml_convert(text->text, 0, &c, TRUE) || !text->alt)
1622 /* spaces in the word are hard */
d7482997 1623 rdaddsc(rs, c);
1624 else
ce9921d6 1625 xhtml_rdaddwc(rs, text->alt, NULL, indexable);
d7482997 1626 sfree(c);
1627 } else if (removeattr(text->type) == word_WhiteSpace) {
1628 rdaddc(rs, ' ');
1629 } else if (removeattr(text->type) == word_Quote) {
1630 rdaddsc(rs, "&quot;");
1631 }
1632
1633 if (towordstyle(text->type) == word_Emph &&
1634 (attraux(text->aux) == attr_Last ||
1635 attraux(text->aux) == attr_Only))
1636 rdaddsc(rs, "</em>");
1637 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1638 (attraux(text->aux) == attr_Last ||
1639 attraux(text->aux) == attr_Only))
1640 rdaddsc(rs, "</code>");
1641 break;
1642 }
1643 }
1644}
1645
1646/* Output a heading, formatted as XHTML.
1647 */
ce9921d6 1648static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
d7482997 1649{
1650 rdstringc t = { 0, 0, NULL };
1651 word *tprefix = p->kwtext;
1652 word *nprefix = p->kwtext2;
1653 word *text = p->words;
1654 int level = xhtml_para_level(p);
1655 xhtmlsection *sect = xhtml_find_section(p);
5d9cc07b 1656 xhtmlheadfmt *fmt;
d7482997 1657 char *fragment;
1658 if (sect) {
1659 fragment = sect->fragment;
1660 } else {
d2e74722 1661 if (p->type == para_Title)
1662 fragment = "title";
1663 else {
1664 fragment = ""; /* FIXME: what else can we do? */
1665 error(err_whatever, "Couldn't locate heading cross-reference!");
1666 }
d7482997 1667 }
1668
5d9cc07b 1669 if (p->type == para_Title)
1670 fmt = NULL;
1671 else if (level == 1)
1672 fmt = &conf.fchapter;
1673 else if (level-1 < conf.nfsect)
1674 fmt = &conf.fsect[level-1];
1675 else
1676 fmt = &conf.fsect[conf.nfsect-1];
1677
1678 if (fmt && fmt->just_numbers && nprefix) {
ce9921d6 1679 xhtml_rdaddwc(&t, nprefix, NULL, indexable);
5d9cc07b 1680 if (fmt) {
1681 char *c;
4b3c5afb 1682 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
5d9cc07b 1683 rdaddsc(&t, c);
1684 sfree(c);
1685 }
1686 }
1687 } else if (fmt && !fmt->just_numbers && tprefix) {
ce9921d6 1688 xhtml_rdaddwc(&t, tprefix, NULL, indexable);
5d9cc07b 1689 if (fmt) {
1690 char *c;
4b3c5afb 1691 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
5d9cc07b 1692 rdaddsc(&t, c);
1693 sfree(c);
1694 }
1695 }
d7482997 1696 }
ce9921d6 1697 xhtml_rdaddwc(&t, text, NULL, indexable);
d9d3dd95 1698 /*
1699 * If we're outputting in single-file mode, we need to lower
1700 * the level of each heading by one, because the overall
1701 * document title will be sitting right at the top as an <h1>
1702 * and so chapters and sections should start at <h2>.
1703 *
1704 * Even if not, the document title will come back from
1705 * xhtml_para_level() as level zero, so we must increment that
1706 * no matter what leaf_level is set to.
1707 */
1708 if (conf.leaf_level == 0 || level == 0)
1709 level++;
d7482997 1710 fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1711 sfree(t.text);
1712}
1713
1714/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1715 * This looks pretty simple; I may have missed something ...
1716 */
ce9921d6 1717static void xhtml_para(FILE *fp, word *text, int indexable)
d7482997 1718{
1719 rdstringc out = { 0, 0, NULL };
ce9921d6 1720 xhtml_rdaddwc(&out, text, NULL, indexable);
d7482997 1721 fprintf(fp, "%s", out.text);
1722 sfree(out.text);
1723}
1724
1725/* Output a code paragraph. I'm treating this as preformatted, which
1726 * may not be entirely correct. See xhtml_para() for my worries about
1727 * this being overly-simple; however I think that most of the complexity
1728 * of the text backend came entirely out of word wrapping anyway.
1729 */
1730static void xhtml_codepara(FILE *fp, word *text)
1731{
1732 fprintf(fp, "<pre>");
1733 for (; text; text = text->next) if (text->type == word_WeakCode) {
4b3c5afb 1734 word *here, *next;
d7482997 1735 char *c;
4b3c5afb 1736
1737 /*
1738 * See if this WeakCode is followed by an Emph to indicate
1739 * emphasis.
1740 */
1741 here = text;
1742 if (text->next && text->next->type == word_Emph) {
1743 next = text = text->next;
1744 } else
1745 next = NULL;
1746
1747 if (next) {
1748 wchar_t *t, *e;
1749 int n;
1750
1751 t = here->text;
1752 e = next->text;
1753
1754 while (*e) {
1755 int ec = *e;
1756
1757 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1758 xhtml_convert(t, n, &c, FALSE);
1759 fprintf(fp, "%s%s%s",
1760 (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1761 c,
1762 (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1763 sfree(c);
1764
1765 t += n;
1766 e += n;
1767 }
1768
1769 xhtml_convert(t, 0, &c, FALSE);
1770 fprintf(fp, "%s\n", c);
1771 sfree(c);
1772 } else {
1773 xhtml_convert(here->text, 0, &c, FALSE);
1774 fprintf(fp, "%s\n", c);
1775 sfree(c);
1776 }
d7482997 1777 }
1778 fprintf(fp, "</pre>\n");
1779}