Limit unicode hex to 4 digits
[sgt/halibut] / bk_xhtml.c
CommitLineData
d7482997 1/*
2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
4 *
5 * Still to do:
6 *
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
11 * perhaps others.
12 *
13 * Limitations:
14 *
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
27 */
28
29#include <stdio.h>
30#include <stdlib.h>
677e18a2 31#include <string.h>
d7482997 32#include <assert.h>
33#include "halibut.h"
34
50d6b4bd 35/*
36 * FILENAME_TEMPLATE (overridable in config of course) allows you
37 * to choose the general form for your HTML file names. It is
38 * slightly printf-styled (% followed by a single character is a
39 * formatting directive, %% is a literal %). Formatting directives
40 * are:
41 *
ba9c1487 42 * - %n is the section type-plus-number, minus whitespace (`Chapter1.2').
50d6b4bd 43 * - %b is the section number on its own (`1.2').
44 * - %k is the section's _internal_ keyword.
45 * - %N is the section's visible title in the output, again minus
46 * whitespace.
47 *
48 * %n, %b and %k will all default to %N if the section is
49 * unnumbered (`Bibliography' is often a good example).
6d6d850c 50 *
51 * FRAGMENT_TEMPLATE is the same, but defines the <a name="foo">
52 * markers used to cross-reference to particular subsections of a
53 * file.
50d6b4bd 54 */
55
56#define FILENAME_SINGLE "Manual.html"
57#define FILENAME_CONTENTS "Contents.html"
58#define FILENAME_INDEX "IndexPage.html"
59#define FILENAME_TEMPLATE "%n.html"
6d6d850c 60#define FRAGMENT_TEMPLATE "%b"
50d6b4bd 61
d7482997 62struct xhtmlsection_Struct {
63 struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
64 struct xhtmlsection_Struct *child; /* NULL if split across files */
65 struct xhtmlsection_Struct *parent; /* NULL if split across files */
66 struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
67 paragraph *para;
68 struct xhtmlfile_Struct *file; /* which file is this a part of? */
69 char *fragment; /* fragment id within the file */
70 int level;
71};
72
73struct xhtmlfile_Struct {
74 struct xhtmlfile_Struct *next;
75 struct xhtmlfile_Struct *child;
76 struct xhtmlfile_Struct *parent;
77 char *filename;
78 struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
79 int is_leaf; /* is this file a leaf file, ie does it not have any children? */
80};
81
82typedef struct xhtmlsection_Struct xhtmlsection;
83typedef struct xhtmlfile_Struct xhtmlfile;
84typedef struct xhtmlindex_Struct xhtmlindex;
85
86struct xhtmlindex_Struct {
87 int nsection;
88 int size;
89 xhtmlsection **sections;
90};
91
92typedef struct {
5d9cc07b 93 int just_numbers;
94 wchar_t *number_suffix;
95} xhtmlheadfmt;
96
97typedef struct {
d7482997 98 int contents_depth[6];
99 int leaf_contains_contents;
100 int leaf_level;
101 int leaf_smallest_contents;
102 int include_version_id;
103 wchar_t *author, *description;
104 wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
105 int suppress_address;
5d9cc07b 106 xhtmlheadfmt fchapter, *fsect;
107 int nfsect;
50d6b4bd 108 char *contents_filename, *index_filename;
6d6d850c 109 char *single_filename, *template_filename, *template_fragment;
d7482997 110} xhtmlconfig;
111
112/*static void xhtml_level(paragraph *, int);
113static void xhtml_level_0(paragraph *);
114static void xhtml_docontents(FILE *, paragraph *, int);
115static void xhtml_dosections(FILE *, paragraph *, int);
116static void xhtml_dobody(FILE *, paragraph *, int);*/
117
118static void xhtml_doheader(FILE *, word *);
119static void xhtml_dofooter(FILE *);
120static void xhtml_versionid(FILE *, word *, int);
121
122static void xhtml_utostr(wchar_t *, char **);
123static int xhtml_para_level(paragraph *);
124static int xhtml_reservedchar(int);
125
4b3c5afb 126static int xhtml_convert(wchar_t *, int, char **, int);
ce9921d6 127static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
128static void xhtml_para(FILE *, word *, int);
d7482997 129static void xhtml_codepara(FILE *, word *);
ce9921d6 130static void xhtml_heading(FILE *, paragraph *, int);
d7482997 131
132/* File-global variables are much easier than passing these things
133 * all over the place. Evil, but easier. We can replace this with a single
134 * structure at some point.
135 */
136static xhtmlconfig conf;
137static keywordlist *keywords;
138static indexdata *idx;
139static xhtmlfile *topfile;
140static xhtmlsection *topsection;
141static paragraph *sourceparas;
142static xhtmlfile *lastfile;
143static xhtmlfile *xhtml_last_file = NULL;
c8c7926b 144static int last_level=-1, start_level;
d7482997 145static xhtmlsection *currentsection;
146
147static xhtmlconfig xhtml_configure(paragraph *source)
148{
149 xhtmlconfig ret;
150
151 /*
152 * Defaults.
153 */
154 ret.contents_depth[0] = 2;
155 ret.contents_depth[1] = 3;
156 ret.contents_depth[2] = 4;
157 ret.contents_depth[3] = 5;
158 ret.contents_depth[4] = 6;
159 ret.contents_depth[5] = 7;
160 ret.leaf_level = 2;
161 ret.leaf_smallest_contents = 4;
162 ret.leaf_contains_contents = FALSE;
163 ret.include_version_id = TRUE;
164 ret.author = NULL;
165 ret.description = NULL;
166 ret.head_end = NULL;
167 ret.body = NULL;
168 ret.body_start = NULL;
169 ret.body_end = NULL;
170 ret.address_start = NULL;
171 ret.address_end = NULL;
172 ret.nav_attrs = NULL;
173 ret.suppress_address = FALSE;
174
5d9cc07b 175 ret.fchapter.just_numbers = FALSE;
e5e6bf9d 176 ret.fchapter.number_suffix = L": ";
5d9cc07b 177 ret.nfsect = 2;
178 ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
179 ret.fsect[0].just_numbers = FALSE;
e5e6bf9d 180 ret.fsect[0].number_suffix = L": ";
5d9cc07b 181 ret.fsect[1].just_numbers = TRUE;
e5e6bf9d 182 ret.fsect[1].number_suffix = L" ";
50d6b4bd 183 ret.contents_filename = strdup(FILENAME_CONTENTS);
184 ret.single_filename = strdup(FILENAME_SINGLE);
185 ret.index_filename = strdup(FILENAME_INDEX);
186 ret.template_filename = strdup(FILENAME_TEMPLATE);
6d6d850c 187 ret.template_fragment = strdup(FRAGMENT_TEMPLATE);
5d9cc07b 188
d7482997 189 for (; source; source = source->next)
190 {
191 if (source->type == para_Config)
192 {
50d6b4bd 193 if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
194 sfree(ret.contents_filename);
195 ret.contents_filename = utoa_dup(uadv(source->keyword));
196 } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
197 sfree(ret.single_filename);
198 ret.single_filename = utoa_dup(uadv(source->keyword));
199 } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
200 sfree(ret.index_filename);
201 ret.index_filename = utoa_dup(uadv(source->keyword));
202 } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
203 sfree(ret.template_filename);
204 ret.template_filename = utoa_dup(uadv(source->keyword));
6d6d850c 205 } else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) {
206 sfree(ret.template_fragment);
207 ret.template_fragment = utoa_dup(uadv(source->keyword));
50d6b4bd 208 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
d7482997 209 ret.contents_depth[0] = utoi(uadv(source->keyword));
210 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
211 ret.contents_depth[1] = utoi(uadv(source->keyword));
212 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
213 ret.contents_depth[2] = utoi(uadv(source->keyword));
214 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
215 ret.contents_depth[3] = utoi(uadv(source->keyword));
216 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
217 ret.contents_depth[4] = utoi(uadv(source->keyword));
218 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
219 ret.contents_depth[5] = utoi(uadv(source->keyword));
220 } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
221 ret.leaf_level = utoi(uadv(source->keyword));
d7482997 222 } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
223 ret.leaf_smallest_contents = utoi(uadv(source->keyword));
224 } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
225 ret.include_version_id = utob(uadv(source->keyword));
226 } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
227 ret.leaf_contains_contents = utob(uadv(source->keyword));
228 } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
229 ret.suppress_address = utob(uadv(source->keyword));
230 } else if (!ustricmp(source->keyword, L"xhtml-author")) {
231 ret.author = uadv(source->keyword);
232 } else if (!ustricmp(source->keyword, L"xhtml-description")) {
233 ret.description = uadv(source->keyword);
234 } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
235 ret.head_end = uadv(source->keyword);
236 } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
237 ret.body_start = uadv(source->keyword);
238 } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
239 ret.body = uadv(source->keyword);
240 } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
241 ret.body_end = uadv(source->keyword);
242 } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
243 ret.address_start = uadv(source->keyword);
244 } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
245 ret.address_end = uadv(source->keyword);
246 } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
247 ret.nav_attrs = uadv(source->keyword);
5d9cc07b 248 } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
249 ret.fchapter.just_numbers = utob(uadv(source->keyword));
250 } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
e5e6bf9d 251 ret.fchapter.number_suffix = uadv(source->keyword);
5d9cc07b 252 } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
253 wchar_t *p = uadv(source->keyword);
254 int n = 0;
255 if (uisdigit(*p)) {
256 n = utoi(p);
257 p = uadv(p);
258 }
259 if (n >= ret.nfsect) {
260 int i;
261 ret.fsect = resize(ret.fsect, n+1);
262 for (i = ret.nfsect; i <= n; i++)
263 ret.fsect[i] = ret.fsect[ret.nfsect-1];
264 ret.nfsect = n+1;
265 }
266 ret.fsect[n].just_numbers = utob(p);
267 } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
268 wchar_t *p = uadv(source->keyword);
269 int n = 0;
270 if (uisdigit(*p)) {
271 n = utoi(p);
272 p = uadv(p);
273 }
274 if (n >= ret.nfsect) {
275 int i;
276 ret.fsect = resize(ret.fsect, n+1);
277 for (i = ret.nfsect; i <= n; i++)
278 ret.fsect[i] = ret.fsect[ret.nfsect-1];
279 ret.nfsect = n+1;
280 }
e5e6bf9d 281 ret.fsect[n].number_suffix = p;
d7482997 282 }
283 }
284 }
285
286 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
287 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
288 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
289 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
290 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
291 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
292 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
293 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
294 return ret;
295}
296
ba9c1487 297paragraph *xhtml_config_filename(char *filename)
298{
299 /*
300 * If the user passes in a single filename as a parameter to
301 * the `--html' command-line option, then we should assume it
302 * to imply _two_ config directives:
303 * \cfg{xhtml-single-filename}{whatever} and
304 * \cfg{xhtml-leaf-level}{0}; the rationale being that the user
305 * wants their output _in that file_.
306 */
307
308 paragraph *p[2];
309 int i, len;
310 wchar_t *ufilename, *up;
311
312 for (i = 0; i < 2; i++) {
313 p[i] = mknew(paragraph);
314 memset(p[i], 0, sizeof(*p[i]));
315 p[i]->type = para_Config;
316 p[i]->next = NULL;
317 p[i]->fpos.filename = "<command line>";
318 p[i]->fpos.line = p[i]->fpos.col = -1;
319 }
320
321 ufilename = ufroma_dup(filename);
322 len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename");
323 p[0]->keyword = mknewa(wchar_t, len);
324 up = p[0]->keyword;
325 ustrcpy(up, L"xhtml-single-filename");
326 up = uadv(up);
327 ustrcpy(up, ufilename);
328 up = uadv(up);
329 *up = L'\0';
330 assert(up - p[0]->keyword < len);
331 sfree(ufilename);
332
333 len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1;
334 p[1]->keyword = mknewa(wchar_t, len);
335 up = p[1]->keyword;
336 ustrcpy(up, L"xhtml-leaf-level");
337 up = uadv(up);
338 ustrcpy(up, L"0");
339 up = uadv(up);
340 *up = L'\0';
341 assert(up - p[1]->keyword < len);
342
343 p[0]->next = p[1];
344
345 return p[0];
346}
347
d7482997 348static xhtmlsection *xhtml_new_section(xhtmlsection *last)
349{
350 xhtmlsection *ret = mknew(xhtmlsection);
351 ret->next=NULL;
352 ret->child=NULL;
353 ret->parent=NULL;
354 ret->chain=last;
355 ret->para=NULL;
356 ret->file=NULL;
357 ret->fragment=NULL;
358 ret->level=-1; /* marker: end of chain */
359 return ret;
360}
361
362/* Returns NULL or the section that marks that paragraph */
363static xhtmlsection *xhtml_find_section(paragraph *p)
364{
365 xhtmlsection *ret = topsection;
366 if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
367 paragraph *p2 = sourceparas;
368 paragraph *p3 = NULL;
369 while (p2 && p2!=p) {
370 if (xhtml_para_level(p2)!=-1) {
371 p3 = p2;
372 }
373 p2=p2->next;
374 }
375 if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
376 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
377 * So don't do that, then.
378 */
379 return NULL;
380 }
381 p=p3;
382 }
383 while (ret && ret->para != p) {
384/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
385 ret=ret->chain;
386 }
387 return ret;
388}
389
6d6d850c 390static void xhtml_format(paragraph *p, char *template_string, rdstringc *r)
391{
392 char *c, *t;
393 word *w;
394 wchar_t *ws;
395
396 t = template_string;
397 while (*t) {
398 if (*t == '%' && t[1]) {
399 int fmt;
400
401 t++;
402 fmt = *t++;
403
404 if (fmt == '%') {
405 rdaddc(r, fmt);
406 continue;
407 }
408
409 w = NULL;
410 ws = NULL;
411
412 if (p->kwtext && fmt == 'n')
413 w = p->kwtext;
414 else if (p->kwtext2 && fmt == 'b')
415 w = p->kwtext2;
416 else if (p->keyword && *p->keyword && fmt == 'k')
417 ws = p->keyword;
418 else
419 w = p->words;
420
421 while (w) {
422 switch (removeattr(w->type))
423 {
424 case word_Normal:
425 /*case word_Emph:
426 case word_Code:
427 case word_WeakCode:*/
428 xhtml_utostr(w->text, &c);
429 rdaddsc(r,c);
430 sfree(c);
431 break;
432 }
433 w = w->next;
434 }
435 if (ws) {
436 xhtml_utostr(ws, &c);
437 rdaddsc(r,c);
438 sfree(c);
439 }
440 } else {
441 rdaddc(r, *t++);
442 }
443 }
444}
445
d7482997 446static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
447{
448 xhtmlfile *ret = mknew(xhtmlfile);
449
450 ret->next=NULL;
451 ret->child=NULL;
452 ret->parent=NULL;
453 ret->filename=NULL;
454 ret->sections=sect;
455 ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
456 if (sect==NULL) {
457 if (conf.leaf_level==0) { /* currently unused */
50d6b4bd 458 ret->filename = smalloc(strlen(conf.single_filename)+1);
459 sprintf(ret->filename, conf.single_filename);
d7482997 460 } else {
50d6b4bd 461 ret->filename = smalloc(strlen(conf.contents_filename)+1);
462 sprintf(ret->filename, conf.contents_filename);
d7482997 463 }
464 } else {
465 paragraph *p = sect->para;
466 rdstringc fname_c = { 0, 0, NULL };
6d6d850c 467 xhtml_format(p, conf.template_filename, &fname_c);
d7482997 468 ret->filename = rdtrimc(&fname_c);
469 }
470 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
471 return ret;
472}
473
474/*
475 * Walk the tree fixing up files which are actually leaf (ie
476 * have no children) but aren't at leaf level, so they have the
477 * leaf flag set.
478 */
479void xhtml_fixup_layout(xhtmlfile* file)
480{
481 if (file->child==NULL) {
482 file->is_leaf = TRUE;
483 } else {
484 xhtml_fixup_layout(file->child);
485 }
486 if (file->next)
487 xhtml_fixup_layout(file->next);
488}
489
490/*
491 * Create the tree structure so we know where everything goes.
492 * Method:
493 *
494 * Ignoring file splitting, we have three choices with each new section:
495 *
496 * +-----------------+-----------------+
497 * | | |
498 * X +----X----+ (1)
499 * | |
5d9cc07b 500 * Y (2)
d7482997 501 * |
502 * (3)
503 *
504 * Y is the last section we added (currentsect).
505 * If sect is the section we want to add, then:
506 *
507 * (1) if sect->level < currentsect->level
508 * (2) if sect->level == currentsect->level
509 * (3) if sect->level > currentsect->level
510 *
511 * This requires the constraint that you never skip section numbers
512 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
513 *
514 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
515 * more than one level at a time. Lots of asserts, and probably part of
516 * the algorithm here, rely on this being true. (It currently isn't
517 * enforced by halibut, however.)
518 *
519 * File splitting makes this harder. For instance, say we added at (3)
520 * above and now need to add another section. We are splitting at level
521 * 2, ie the level of Y. Z is the last section we added:
522 *
523 * +-----------------+-----------------+
524 * | | |
525 * X +----X----+ (1)
526 * | |
527 * +----Y----+ (1)
528 * | |
529 * Z (2)
530 * |
531 * (3)
532 *
533 * The (1) case is now split; we need to search upwards to find where
534 * to actually link in. The other two cases remain the same (and will
535 * always be like this).
536 *
537 * File splitting makes this harder, however. The decision of whether
538 * to split to a new file is always on the same condition, however (is
539 * the level of this section higher than the leaf_level configuration
540 * value or not).
541 *
542 * Treating the cases backwards:
543 *
544 * (3) same file if sect->level > conf.leaf_level, otherwise new file
545 *
546 * if in the same file, currentsect->child points to sect
547 * otherwise the linking is done through the file tree (which works
548 * in more or less the same way, ie currentfile->child points to
549 * the new file)
550 *
551 * (2) same file if sect->level > conf.leaf_level, otherwise new file
552 *
553 * if in the same file, currentsect->next points to sect
554 * otherwise file linking and currentfile->next points to the new
555 * file (we know that Z must have caused a new file to be created)
556 *
557 * (1) same file if sect->level > conf.leaf_level, otherwise new file
558 *
559 * this is actually effectively the same case as (2) here,
560 * except that we first have to travel up the sections to figure
561 * out which section this new one will be a sibling of. In doing
562 * so, we may disappear off the top of a file and have to go up
563 * to its parent in the file tree.
564 *
565 */
566static void xhtml_ponder_layout(paragraph *p)
567{
568 xhtmlsection *lastsection;
569 xhtmlsection *currentsect;
570 xhtmlfile *currentfile;
571
572 lastfile = NULL;
573 topsection = xhtml_new_section(NULL);
574 topfile = xhtml_new_file(NULL);
575 lastsection = topsection;
576 currentfile = topfile;
577 currentsect = topsection;
578
d2e74722 579 if (conf.leaf_level == 0) {
580 topfile->is_leaf = 1;
581 topfile->sections = topsection;
582 topsection->file = topfile;
583 }
584
d7482997 585 for (; p; p=p->next)
586 {
587 int level = xhtml_para_level(p);
588 if (level>0) /* actually a section */
589 {
590 xhtmlsection *sect;
6d6d850c 591 rdstringc frag_c = { 0, 0, NULL };
d7482997 592
593 sect = xhtml_new_section(lastsection);
594 lastsection = sect;
595 sect->para = p;
6d6d850c 596
597 xhtml_format(p, conf.template_fragment, &frag_c);
598 sect->fragment = rdtrimc(&frag_c);
d7482997 599 sect->level = level;
600 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
601
602 if (level>currentsect->level) { /* case (3) */
603 if (level>conf.leaf_level) { /* same file */
604 assert(currentfile->is_leaf);
605 currentsect->child = sect;
606 sect->parent=currentsect;
607 sect->file=currentfile;
608 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
609 currentsect=sect;
610 } else { /* new file */
611 xhtmlfile *file = xhtml_new_file(sect);
612 assert(!currentfile->is_leaf);
613 currentfile->child=file;
614 sect->file=file;
615 file->parent=currentfile;
616 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
617 currentfile=file;
618 currentsect=sect;
619 }
620 } else if (level >= currentsect->file->sections->level) {
621 /* Case (1) or (2) *AND* still under the section that starts
622 * the current file.
623 *
624 * I'm not convinced that this couldn't be rolled in with the
625 * final else {} leg further down. It seems a lot of effort
626 * this way.
627 */
628 if (level>conf.leaf_level) { /* stick within the same file */
629 assert(currentfile->is_leaf);
630 sect->file = currentfile;
631 while (currentsect && currentsect->level > level &&
632 currentsect->file==currentsect->parent->file) {
633 currentsect = currentsect->parent;
634 }
635 assert(currentsect);
636 currentsect->next = sect;
637 assert(currentsect->level == sect->level);
638 sect->parent = currentsect->parent;
639 currentsect = sect;
640 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
641 } else { /* new file */
642 xhtmlfile *file = xhtml_new_file(sect);
643 sect->file=file;
644 currentfile->next=file;
645 file->parent=currentfile->parent;
646 file->is_leaf=(level==conf.leaf_level);
647 file->sections=sect;
648 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
649 currentfile=file;
650 currentsect=sect;
651 }
652 } else { /* Case (1) or (2) and we must move up the file tree first */
653 /* this loop is now probably irrelevant - we know we can't connect
654 * to anything in the current file */
655 while (currentsect && level<currentsect->level) {
656 currentsect=currentsect->parent;
657 if (currentsect) {
658 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
659 } else {
660 /* printf(" * up one level (off top of current file)\n");*/
661 }
662 }
663 if (currentsect) {
664 /* I'm pretty sure this can now never fire */
665 assert(currentfile->is_leaf);
666 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
667 sect->file = currentfile;
668 currentsect->next=sect;
669 currentsect=sect;
670 } else { /* find a file we can attach to */
671 while (currentfile && currentfile->sections && level<currentfile->sections->level) {
672 currentfile=currentfile->parent;
673 if (currentfile) {
674 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
675 } else {
676 /* printf(" * up one file level (off top of tree)\n");*/
677 }
678 }
679 if (currentfile) { /* new file (we had to skip up a file to
680 get here, so we must be dealing with a
681 level no lower than the configured
682 leaf_level */
683 xhtmlfile *file = xhtml_new_file(sect);
684 currentfile->next=file;
685 sect->file=file;
686 file->parent=currentfile->parent;
687 file->is_leaf=(level==conf.leaf_level);
688 file->sections=sect;
689 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
690 currentfile=file;
691 currentsect=sect;
692 } else {
693 fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
694 }
695 }
696 }
697 }
698 }
699 topsection = lastsection; /* get correct end of the chain */
700 xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
701}
702
703static void xhtml_do_index();
704static void xhtml_do_file(xhtmlfile *file);
705static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
ce9921d6 706static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
d7482997 707static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
708static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
709static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
710static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
711static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
712static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
713
714/*
715 * Do all the files in this structure.
716 */
717static void xhtml_do_files(xhtmlfile *file)
718{
719 xhtml_do_file(file);
720 if (file->child)
721 xhtml_do_files(file->child);
722 if (file->next)
723 xhtml_do_files(file->next);
724}
725
726/*
727 * Free up all memory used by the file tree from 'xfile' downwards
728 */
729static void xhtml_free_file(xhtmlfile* xfile)
730{
731 if (xfile==NULL) {
732 return;
733 }
734
735 if (xfile->filename) {
736 sfree(xfile->filename);
737 }
738 xhtml_free_file(xfile->child);
739 xhtml_free_file(xfile->next);
740 sfree(xfile);
741}
742
743/*
744 * Main function.
745 */
746void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
747 indexdata *in_idx)
748{
749/* int i;*/
750 indexentry *ientry;
751 int ti;
752 xhtmlsection *xsect;
753
754 sourceparas = sourceform;
755 conf = xhtml_configure(sourceform);
756 keywords = in_keywords;
757 idx = in_idx;
758
759 /* Clear up the index entries backend data pointers */
760 for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
761 ientry->backend_data=NULL;
762 }
763
764 xhtml_ponder_layout(sourceform);
765
766 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
767/* xhtml_level_0(sourceform);
768 for (i=1; i<=conf.leaf_level; i++)
769 {
770 xhtml_level(sourceform, i);
771 }*/
772
773 /* new system ... (writes to *.html, but isn't fully trusted) */
774 xhtml_do_top_file(topfile, sourceform);
775 assert(!topfile->next); /* shouldn't have a sibling at all */
d2e74722 776 if (topfile->child) {
777 xhtml_do_files(topfile->child);
778 xhtml_do_index();
779 }
d7482997 780
781 /* release file, section, index data structures */
782 xsect = topsection;
783 while (xsect) {
784 xhtmlsection *tmp = xsect->chain;
785 if (xsect->fragment) {
786 sfree(xsect->fragment);
787 }
788 sfree(xsect);
789 xsect = tmp;
790 }
791 xhtml_free_file(topfile);
792 for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
793 if (ientry->backend_data!=NULL) {
794 xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
795 if (xi->sections!=NULL) {
796 sfree(xi->sections);
797 }
798 sfree(xi);
799 }
800 ientry->backend_data = NULL;
801 }
e5e6bf9d 802 sfree(conf.fsect);
d7482997 803}
804
805static int xhtml_para_level(paragraph *p)
806{
807 switch (p->type)
808 {
d9d3dd95 809 case para_Title:
810 return 0;
811 break;
d7482997 812 case para_UnnumberedChapter:
813 case para_Chapter:
814 case para_Appendix:
815 return 1;
816 break;
817/* case para_BiblioCited:
818 return 2;
819 break;*/
820 case para_Heading:
821 case para_Subsect:
822 return p->aux+2;
823 break;
824 default:
825 return -1;
826 break;
827 }
828}
829
d7482997 830/* Output the nav links for the current file.
831 * file == NULL means we're doing the index
832 */
833static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
834{
835 xhtmlfile *xhtml_next_file = NULL;
836 fprintf(fp, "<p");
837 if (conf.nav_attrs!=NULL) {
838 fprintf(fp, " %ls>", conf.nav_attrs);
839 } else {
840 fprintf(fp, ">");
841 }
842 if (xhtml_last_file==NULL) {
843 fprintf(fp, "Previous | ");
844 } else {
845 fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
846 }
50d6b4bd 847 fprintf(fp, "<a href='%s'>Contents</a> | ", conf.contents_filename);
25acf71d 848 if (file == NULL) {
849 fprintf(fp, "Index | ");
850 } else {
50d6b4bd 851 fprintf(fp, "<a href='%s'>Index</a> | ", conf.index_filename);
25acf71d 852 }
d7482997 853 if (file != NULL) { /* otherwise we're doing nav links for the index */
854 if (xhtml_next_file==NULL)
855 xhtml_next_file = file->child;
856 if (xhtml_next_file==NULL)
857 xhtml_next_file = file->next;
858 if (xhtml_next_file==NULL)
859 xhtml_next_file = file->parent->next;
860 }
861 if (xhtml_next_file==NULL) {
862 if (file==NULL) { /* index, so no next file */
863 fprintf(fp, "Next ");
864 } else {
50d6b4bd 865 fprintf(fp, "<a href='%s'>Next</a>", conf.index_filename);
d7482997 866 }
867 } else {
868 fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
869 }
870 fprintf(fp, "</p>\n");
871}
872
873/* Write out the index file */
d2e74722 874static void xhtml_do_index_body(FILE *fp)
d7482997 875{
d7482997 876 indexentry *y;
877 int ti;
d7482997 878
d2e74722 879 if (count234(idx->entries) == 0)
880 return; /* don't write anything at all */
d7482997 881
882 fprintf(fp, "<dl>\n");
883 /* iterate over idx->entries using the tree functions and display everything */
884 for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
885 if (y->backend_data) {
886 int i;
887 xhtmlindex *xi;
888
889 fprintf(fp, "<dt>");
ce9921d6 890 xhtml_para(fp, y->text, FALSE);
d7482997 891 fprintf(fp, "</dt>\n<dd>");
892
893 xi = (xhtmlindex*) y->backend_data;
894 for (i=0; i<xi->nsection; i++) {
895 xhtmlsection *sect = xi->sections[i];
896 if (sect) {
897 fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
898 if (sect->para->kwtext) {
ce9921d6 899 xhtml_para(fp, sect->para->kwtext, FALSE);
d7482997 900 } else if (sect->para->words) {
ce9921d6 901 xhtml_para(fp, sect->para->words, FALSE);
d7482997 902 }
903 fprintf(fp, "</a>");
904 if (i+1<xi->nsection) {
905 fprintf(fp, ", ");
906 }
907 }
908 }
909 fprintf(fp, "</dd>\n");
910 }
911 }
912 fprintf(fp, "</dl>\n");
d2e74722 913}
914static void xhtml_do_index()
915{
916 word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
50d6b4bd 917 FILE *fp = fopen(conf.index_filename, "w");
d2e74722 918
919 if (fp==NULL)
50d6b4bd 920 fatal(err_cantopenw, conf.index_filename);
d2e74722 921 xhtml_doheader(fp, &temp_word);
922 xhtml_donavlinks(fp, NULL);
923
924 xhtml_do_index_body(fp);
d7482997 925
926 xhtml_donavlinks(fp, NULL);
927 xhtml_dofooter(fp);
928 fclose(fp);
929}
930
931/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
932static void xhtml_do_file(xhtmlfile *file)
933{
934 FILE *fp = fopen(file->filename, "w");
935 if (fp==NULL)
936 fatal(err_cantopenw, file->filename);
937
938 if (file->sections->para->words) {
939 xhtml_doheader(fp, file->sections->para->words);
940 } else if (file->sections->para->kwtext) {
941 xhtml_doheader(fp, file->sections->para->kwtext);
942 } else {
943 xhtml_doheader(fp, NULL);
944 }
945
946 xhtml_donavlinks(fp, file);
947
d2e74722 948 if (file->is_leaf && conf.leaf_contains_contents &&
949 xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
d7482997 950 xhtml_do_contents(fp, file);
951 xhtml_do_sections(fp, file->sections);
952 if (!file->is_leaf)
953 xhtml_do_naked_contents(fp, file);
954
955 xhtml_donavlinks(fp, file);
956
957 xhtml_dofooter(fp);
958 fclose(fp);
959
960 xhtml_last_file = file;
961}
962
963/* Output the top-level file. */
964static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
965{
966 paragraph *p;
967 int done=FALSE;
968 FILE *fp = fopen(file->filename, "w");
969 if (fp==NULL)
970 fatal(err_cantopenw, file->filename);
971
972 /* Do the title -- only one allowed */
973 for (p = sourceform; p && !done; p = p->next)
974 {
975 if (p->type == para_Title)
976 {
977 xhtml_doheader(fp, p->words);
978 done=TRUE;
979 }
980 }
981 if (!done)
982 xhtml_doheader(fp, NULL /* Eek! */);
983
d2e74722 984 /*
985 * Display the title.
986 */
987 for (p = sourceform; p; p = p->next)
988 {
989 if (p->type == para_Title) {
ce9921d6 990 xhtml_heading(fp, p, FALSE);
d2e74722 991 break;
992 }
993 }
994
9057a0a8 995 /* Do the preamble */
d7482997 996 for (p = sourceform; p; p = p->next)
997 {
8902e0ed 998 if (p->type == para_Chapter || p->type == para_Heading ||
999 p->type == para_Subsect || p->type == para_Appendix ||
1000 p->type == para_UnnumberedChapter) {
1001 /*
1002 * We've found the end of the preamble. Do every normal
1003 * paragraph up to there.
1004 */
ce9921d6 1005 xhtml_do_paras(fp, sourceform, p, FALSE);
8902e0ed 1006 break;
d7482997 1007 }
1008 }
d7482997 1009
1010 xhtml_do_contents(fp, file);
1011 xhtml_do_sections(fp, file->sections);
d2e74722 1012
5d9cc07b 1013 /*
1014 * Put the index in the top file if we're in single-file mode
1015 * (leaf-level 0).
1016 */
1017 if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
d2e74722 1018 fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
1019 xhtml_do_index_body(fp);
1020 }
1021
d7482997 1022 xhtml_dofooter(fp);
1023 fclose(fp);
1024}
1025
1026/* Convert a Unicode string to an ASCII one. '?' is
1027 * used for unmappable characters.
1028 */
1029static void xhtml_utostr(wchar_t *in, char **out)
1030{
1031 int l = ustrlen(in);
1032 int i;
1033 *out = smalloc(l+1);
1034 for (i=0; i<l; i++)
1035 {
1036 if (in[i]>=32 && in[i]<=126)
1037 (*out)[i]=(char)in[i];
1038 else
1039 (*out)[i]='?';
1040 }
1041 (*out)[i]=0;
1042}
1043
1044/*
1045 * Write contents for the given file, and subfiles, down to
1046 * the appropriate contents depth. Returns the number of
1047 * entries written.
1048 */
1049static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
1050{
c8c7926b 1051 int level, limit, count = 0;
d7482997 1052 if (!file)
1053 return 0;
1054
1055 level = (file->sections)?(file->sections->level):(0);
1056 limit = conf.contents_depth[(level>5)?(5):(level)];
1057 start_level = (file->is_leaf) ? (level-1) : (level);
1058 last_level = start_level;
1059
1060 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1061 count += xhtml_do_contents_limit(fp, file->child, limit);
1062 if (fp!=NULL) {
1063 while (last_level > start_level) {
1064 last_level--;
c8c7926b 1065 fprintf(fp, "</li></ul>\n");
d7482997 1066 }
1067 }
1068 return count;
1069}
1070
1071/* As above, but doesn't do anything in the current file */
1072static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
1073{
1074 int level, limit, start_level, count = 0;
1075 if (!file)
1076 return 0;
1077
1078 level = (file->sections)?(file->sections->level):(0);
1079 limit = conf.contents_depth[(level>5)?(5):(level)];
1080 start_level = (file->is_leaf) ? (level-1) : (level);
1081 last_level = start_level;
1082
1083 count = xhtml_do_contents_limit(fp, file->child, limit);
1084 if (fp!=NULL) {
1085 while (last_level > start_level) {
1086 last_level--;
c8c7926b 1087 fprintf(fp, "</li></ul>\n");
d7482997 1088 }
1089 }
1090 return count;
1091}
1092
1093/*
1094 * Write contents for the given file, children, and siblings, down to
1095 * given limit contents depth.
1096 */
1097static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
1098{
1099 int count = 0;
1100 while (file) {
1101 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1102 count += xhtml_do_contents_limit(fp, file->child, limit);
1103 file = file->next;
1104 }
1105 return count;
1106}
1107
1108/*
1109 * Write contents entries for the given section tree, down to the
1110 * limit contents depth.
1111 */
1112static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
1113{
1114 int count = 0;
1115 while (section) {
1116 if (!xhtml_add_contents_entry(fp, section, limit))
1117 return 0;
1118 else
1119 count++;
1120 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1121 section = section->next;
1122 }
1123 return count;
1124}
1125
1126/*
1127 * Write contents entries for the given section tree, down to the
1128 * limit contents depth.
1129 */
1130static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1131{
1132 int count = 0;
1133 if (!section)
1134 return 0;
1135 xhtml_add_contents_entry(fp, section, limit);
1136 count=1;
1137 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1138 /* section=section->child;
1139 while (section && xhtml_add_contents_entry(fp, section, limit)) {
1140 section = section->next;
1141 }*/
1142 return count;
1143}
1144
1145/*
1146 * Add a section entry, unless we're exceeding the limit, in which
1147 * case return FALSE (otherwise return TRUE).
1148 */
1149static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1150{
1151 if (!section || section->level > limit)
1152 return FALSE;
5d9cc07b 1153 if (fp==NULL || section->level < 0)
d7482997 1154 return TRUE;
c8c7926b 1155 if (last_level > section->level) {
1156 while (last_level > section->level) {
1157 last_level--;
1158 fprintf(fp, "</li></ul>\n");
1159 }
1160 fprintf(fp, "</li>\n");
1161 } else if (last_level < section->level) {
1162 assert(last_level == section->level - 1);
d7482997 1163 last_level++;
1164 fprintf(fp, "<ul>\n");
c8c7926b 1165 } else {
1166 fprintf(fp, "</li>\n");
d7482997 1167 }
1168 fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1169 if (section->para->kwtext) {
ce9921d6 1170 xhtml_para(fp, section->para->kwtext, FALSE);
d7482997 1171 if (section->para->words) {
1172 fprintf(fp, ": ");
1173 }
1174 }
1175 if (section->para->words) {
ce9921d6 1176 xhtml_para(fp, section->para->words, FALSE);
d7482997 1177 }
c8c7926b 1178 fprintf(fp, "</a>\n");
d7482997 1179 return TRUE;
1180}
1181
1182/*
1183 * Write all the sections in this file. Do all paragraphs in this section, then all
1184 * children (recursively), then go on to the next one (tail recursively).
1185 */
1186static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1187{
1188 while (sections) {
1189 currentsection = sections;
ce9921d6 1190 xhtml_do_paras(fp, sections->para, NULL, TRUE);
d7482997 1191 xhtml_do_sections(fp, sections->child);
1192 sections = sections->next;
1193 }
1194}
1195
1196/* Write this list of paragraphs. Close off all lists at the end. */
ce9921d6 1197static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1198 int indexable)
d7482997 1199{
7136a6c7 1200 int last_type = -1, ptype, first=TRUE;
1201 stack lcont_stack = stk_new();
d7482997 1202 if (!p)
1203 return;
1204
1205/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
8902e0ed 1206 for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
d7482997 1207 first=FALSE;
7136a6c7 1208 switch (ptype = p->type)
d7482997 1209 {
1210 /*
1211 * Things we ignore because we've already processed them or
1212 * aren't going to touch them in this pass.
1213 */
1214 case para_IM:
1215 case para_BR:
1216 case para_Biblio: /* only touch BiblioCited */
1217 case para_VersionID:
d7482997 1218 case para_NoCite:
1219 case para_Title:
1220 break;
1221
1222 /*
1223 * Chapter titles.
1224 */
1225 case para_Chapter:
1226 case para_Appendix:
1227 case para_UnnumberedChapter:
ce9921d6 1228 xhtml_heading(fp, p, indexable);
d7482997 1229 break;
1230
1231 case para_Heading:
1232 case para_Subsect:
ce9921d6 1233 xhtml_heading(fp, p, indexable);
d7482997 1234 break;
1235
1236 case para_Rule:
1237 fprintf(fp, "\n<hr />\n");
1238 break;
1239
1240 case para_Normal:
9057a0a8 1241 case para_Copyright:
d7482997 1242 fprintf(fp, "\n<p>");
ce9921d6 1243 xhtml_para(fp, p->words, indexable);
d7482997 1244 fprintf(fp, "</p>\n");
1245 break;
1246
7136a6c7 1247 case para_LcontPush:
1248 {
1249 int *p;
1250 p = mknew(int);
1251 *p = last_type;
1252 stk_push(lcont_stack, p);
1253 last_type = para_Normal;
1254 }
1255 break;
1256 case para_LcontPop:
1257 {
1258 int *p = stk_pop(lcont_stack);
1259 assert(p);
1260 ptype = last_type = *p;
1261 sfree(p);
1262 goto closeofflist; /* ick */
1263 }
1264 break;
2614b01d 1265 case para_QuotePush:
1266 fprintf(fp, "<blockquote>\n");
1267 break;
1268 case para_QuotePop:
1269 fprintf(fp, "</blockquote>\n");
1270 break;
7136a6c7 1271
d7482997 1272 case para_Bullet:
1273 case para_NumberedList:
7136a6c7 1274 case para_Description:
1275 case para_DescribedThing:
d7482997 1276 case para_BiblioCited:
c8c7926b 1277 if (last_type!=p->type &&
1278 !(last_type==para_DescribedThing && p->type==para_Description) &&
1279 !(last_type==para_Description && p->type==para_DescribedThing)) {
d7482997 1280 /* start up list if necessary */
1281 if (p->type == para_Bullet) {
1282 fprintf(fp, "<ul>\n");
1283 } else if (p->type == para_NumberedList) {
1284 fprintf(fp, "<ol>\n");
7136a6c7 1285 } else if (p->type == para_BiblioCited ||
1286 p->type == para_DescribedThing ||
1287 p->type == para_Description) {
d7482997 1288 fprintf(fp, "<dl>\n");
1289 }
1290 }
7136a6c7 1291 if (p->type == para_Bullet || p->type == para_NumberedList) {
d7482997 1292 fprintf(fp, "<li>");
7136a6c7 1293 } else if (p->type == para_DescribedThing) {
1294 fprintf(fp, "<dt>");
1295 } else if (p->type == para_Description) {
1296 fprintf(fp, "<dd>");
1297 } else if (p->type == para_BiblioCited) {
d7482997 1298 fprintf(fp, "<dt>");
ce9921d6 1299 xhtml_para(fp, p->kwtext, indexable);
d7482997 1300 fprintf(fp, "</dt>\n<dd>");
1301 }
ce9921d6 1302 xhtml_para(fp, p->words, indexable);
7136a6c7 1303 {
1304 paragraph *p2 = p->next;
1305 if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1306 break;
1307 }
1308
1309 closeofflist:
1310 if (ptype == para_BiblioCited) {
d7482997 1311 fprintf(fp, "</dd>\n");
c8c7926b 1312 } else if (ptype == para_DescribedThing) {
7136a6c7 1313 fprintf(fp, "</dt>");
c8c7926b 1314 } else if (ptype == para_Description) {
7136a6c7 1315 fprintf(fp, "</dd>");
1316 } else if (ptype == para_Bullet || ptype == para_NumberedList) {
d7482997 1317 fprintf(fp, "</li>");
1318 }
7136a6c7 1319 if (ptype == para_Bullet || ptype == para_NumberedList ||
1320 ptype == para_BiblioCited || ptype == para_Description ||
1321 ptype == para_DescribedThing)
d7482997 1322 /* close off list if necessary */
1323 {
1324 paragraph *p2 = p->next;
1325 int close_off=FALSE;
1326/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1327 if (p2 && xhtml_para_level(p2)==-1) {
c8c7926b 1328 if (p2->type != ptype &&
1329 !(p2->type==para_DescribedThing && ptype==para_Description) &&
1330 !(p2->type==para_Description && ptype==para_DescribedThing) &&
1331 p2->type != para_LcontPush)
d7482997 1332 close_off=TRUE;
1333 } else {
1334 close_off=TRUE;
1335 }
1336 if (close_off) {
7136a6c7 1337 if (ptype == para_Bullet) {
d7482997 1338 fprintf(fp, "</ul>\n");
7136a6c7 1339 } else if (ptype == para_NumberedList) {
d7482997 1340 fprintf(fp, "</ol>\n");
7136a6c7 1341 } else if (ptype == para_BiblioCited ||
1342 ptype == para_Description ||
1343 ptype == para_DescribedThing) {
d7482997 1344 fprintf(fp, "</dl>\n");
1345 }
1346 }
1347 }
1348 break;
1349
1350 case para_Code:
1351 xhtml_codepara(fp, p->words);
1352 break;
1353 }
7136a6c7 1354 last_type = ptype;
d7482997 1355 }
7136a6c7 1356
1357 stk_free(lcont_stack);
d7482997 1358}
1359
1360/*
1361 * Output a header for this XHTML file.
1362 */
1363static void xhtml_doheader(FILE *fp, word *title)
1364{
1365 fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1366 fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1367 fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1368 if (title==NULL)
1369 fprintf(fp, "The thing with no name!");
1370 else
ce9921d6 1371 xhtml_para(fp, title, FALSE);
d7482997 1372 fprintf(fp, "</title>\n");
1373 fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1374 if (conf.author)
1375 fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1376 if (conf.description)
1377 fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1378 if (conf.head_end)
1379 fprintf(fp, "%ls\n", conf.head_end);
1380 fprintf(fp, "</head>\n\n");
1381 if (conf.body)
1382 fprintf(fp, "%ls\n", conf.body);
1383 else
1384 fprintf(fp, "<body>\n");
1385 if (conf.body_start)
1386 fprintf(fp, "%ls\n", conf.body_start);
1387}
1388
1389/*
1390 * Output a footer for this XHTML file.
1391 */
1392static void xhtml_dofooter(FILE *fp)
1393{
1394 fprintf(fp, "\n<hr />\n\n");
1395 if (conf.body_end)
1396 fprintf(fp, "%ls\n", conf.body_end);
1397 if (!conf.suppress_address) {
1398 fprintf(fp,"<address>\n");
1399 if (conf.address_start)
1400 fprintf(fp, "%ls\n", conf.address_start);
1401 /* Do the version ID */
1402 if (conf.include_version_id) {
1403 paragraph *p;
1404 int started = 0;
1405 for (p = sourceparas; p; p = p->next)
1406 if (p->type == para_VersionID) {
1407 xhtml_versionid(fp, p->words, started);
1408 started = 1;
1409 }
1410 }
1411 if (conf.address_end)
1412 fprintf(fp, "%ls\n", conf.address_end);
1413 fprintf(fp, "</address>\n");
1414 }
1415 fprintf(fp, "</body>\n\n</html>\n");
1416}
1417
1418/*
1419 * Output the versionid paragraph. Typically this is a version control
1420 * ID string (such as $Id...$ in RCS).
1421 */
1422static void xhtml_versionid(FILE *fp, word *text, int started)
1423{
1424 rdstringc t = { 0, 0, NULL };
1425
1426 rdaddc(&t, '['); /* FIXME: configurability */
ce9921d6 1427 xhtml_rdaddwc(&t, text, NULL, FALSE);
d7482997 1428 rdaddc(&t, ']'); /* FIXME: configurability */
1429
1430 if (started)
c8c7926b 1431 fprintf(fp, "<br />\n");
d7482997 1432 fprintf(fp, "%s\n", t.text);
1433 sfree(t.text);
1434}
1435
1436/* Is this an XHTML reserved character? */
1437static int xhtml_reservedchar(int c)
1438{
1439 if (c=='&' || c=='<' || c=='>' || c=='"')
1440 return TRUE;
1441 else
1442 return FALSE;
1443}
1444
1445/*
1446 * Convert a wide string into valid XHTML: Anything outside ASCII will
1447 * be fixed up as an entity. Currently we don't worry about constraining the
1448 * encoded character set, which we should probably do at some point (we can
1449 * still fix up and return FALSE - see the last comment here). We also don't
1450 * currently
1451 *
1452 * Because this is only used for words, spaces are HARD spaces (any other
1453 * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1454 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1455 * rule).
1456 *
1457 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1458 * it in `*result'. If `result' is NULL, merely checks whether all
1459 * characters in the string are feasible.
1460 *
1461 * Return is nonzero if all characters are OK. If not all
1462 * characters are OK but `result' is non-NULL, a result _will_
1463 * still be generated!
1464 */
4b3c5afb 1465static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1466 int hard_spaces) {
d7482997 1467 int doing = (result != 0);
1468 int ok = TRUE;
1469 char *p = NULL;
1470 int plen = 0, psize = 0;
1471
4b3c5afb 1472 if (maxlen <= 0)
1473 maxlen = -1;
1474
1475 for (; *s && maxlen != 0; s++, maxlen--) {
d7482997 1476 wchar_t c = *s;
1477
1478#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1479
1480 if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1481 /* Char is OK. */
1482 if (doing)
1483 {
1484 ensure_size(plen);
1485 p[plen++] = (char)c;
1486 }
1487 } else {
1488 /* Char needs fixing up. */
1489 /* ok = FALSE; -- currently we never return FALSE; we
1490 * might want to when considering a character set for the
1491 * encoded document.
1492 */
1493 if (doing)
1494 {
1495 if (c==32) { /* a space in a word is a hard space */
1496 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1497 sprintf(p+plen, "&nbsp;");
1498 plen+=6;
1499 } else {
1500 /* FIXME: entity names! */
1501 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1502 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1503 }
1504 }
1505 }
1506 }
1507 if (doing) {
1508 p = resize(p, plen+1);
1509 p[plen] = '\0';
1510 *result = p;
1511 }
1512 return ok;
1513}
1514
1515/*
1516 * This formats the given words as XHTML.
ce9921d6 1517 *
1518 * `indexable', if FALSE, prohibits adding any index references.
1519 * You might use this, for example, if an index reference occurred
1520 * in a section title, to prevent phony index references when the
1521 * section title is processed in strange places such as contents
1522 * sections.
d7482997 1523 */
ce9921d6 1524static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
d7482997 1525 char *c;
1526 keyword *kwl;
1527 xhtmlsection *sect;
1528 indextag *itag;
1529 int ti;
1530
1531 for (; text && text != end; text = text->next) {
1532 switch (text->type) {
1533 case word_HyperLink:
1534 xhtml_utostr(text->text, &c);
1535 rdaddsc(rs, "<a href=\"");
1536 rdaddsc(rs, c);
1537 rdaddsc(rs, "\">");
1538 sfree(c);
1539 break;
1540
1541 case word_UpperXref:
1542 case word_LowerXref:
1543 kwl = kw_lookup(keywords, text->text);
1544 if (kwl) {
1545 sect=xhtml_find_section(kwl->para);
1546 if (sect) {
1547 rdaddsc(rs, "<a href=\"");
1548 rdaddsc(rs, sect->file->filename);
1549 rdaddc(rs, '#');
1550 rdaddsc(rs, sect->fragment);
1551 rdaddsc(rs, "\">");
1552 } else {
1553 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1554 error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1555 }
1556 } else {
1557 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1558 error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1559 }
1560 break;
1561
1562 case word_IndexRef: /* in theory we could make an index target here */
1563/* rdaddsc(rs, "<a name=\"idx-");
1564 xhtml_utostr(text->text, &c);
1565 rdaddsc(rs, c);
1566 sfree(c);
1567 rdaddsc(rs, "\"></a>");*/
1568 /* what we _do_ need to do is to fix up the backend data
1569 * for any indexentry this points to.
1570 */
ce9921d6 1571 if (!indexable)
1572 break;
1573
d7482997 1574 for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1575 /* FIXME: really ustricmp() and not ustrcmp()? */
1576 if (ustricmp(itag->name, text->text)==0) {
1577 break;
1578 }
1579 }
1580 if (itag!=NULL) {
1581 if (itag->refs!=NULL) {
1582 int i;
1583 for (i=0; i<itag->nrefs; i++) {
1584 xhtmlindex *idx_ref;
1585 indexentry *ientry;
1586
1587 ientry = itag->refs[i];
1588 if (ientry->backend_data==NULL) {
1589 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1590 if (idx_ref==NULL)
1591 fatal(err_nomemory);
1592 idx_ref->nsection = 0;
1593 idx_ref->size = 4;
1594 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1595 if (idx_ref->sections==NULL)
1596 fatal(err_nomemory);
1597 ientry->backend_data = idx_ref;
1598 } else {
1599 idx_ref = ientry->backend_data;
1600 if (idx_ref->nsection+1 > idx_ref->size) {
1601 int new_size = idx_ref->size * 2;
1602 idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1603 if (idx_ref->sections==NULL) {
1604 fatal(err_nomemory);
1605 }
1606 idx_ref->size = new_size;
1607 }
1608 }
1609 idx_ref->sections[idx_ref->nsection++] = currentsection;
1610#if 0
1611#endif
1612 }
1613 } else {
1614 fatal(err_whatever, "Index tag had no entries!");
1615 }
1616 } else {
1617 fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1618 fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1619 }
1620 break;
1621
1622 case word_HyperEnd:
1623 case word_XrefEnd:
1624 rdaddsc(rs, "</a>");
1625 break;
1626
1627 case word_Normal:
1628 case word_Emph:
1629 case word_Code:
1630 case word_WeakCode:
1631 case word_WhiteSpace:
1632 case word_EmphSpace:
1633 case word_CodeSpace:
1634 case word_WkCodeSpace:
1635 case word_Quote:
1636 case word_EmphQuote:
1637 case word_CodeQuote:
1638 case word_WkCodeQuote:
1639 assert(text->type != word_CodeQuote &&
1640 text->type != word_WkCodeQuote);
1641 if (towordstyle(text->type) == word_Emph &&
1642 (attraux(text->aux) == attr_First ||
1643 attraux(text->aux) == attr_Only))
1644 rdaddsc(rs, "<em>");
1645 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1646 (attraux(text->aux) == attr_First ||
1647 attraux(text->aux) == attr_Only))
1648 rdaddsc(rs, "<code>");
1649
1650 if (removeattr(text->type) == word_Normal) {
4b3c5afb 1651 if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
d7482997 1652 rdaddsc(rs, c);
1653 else
ce9921d6 1654 xhtml_rdaddwc(rs, text->alt, NULL, indexable);
d7482997 1655 sfree(c);
1656 } else if (removeattr(text->type) == word_WhiteSpace) {
1657 rdaddc(rs, ' ');
1658 } else if (removeattr(text->type) == word_Quote) {
1659 rdaddsc(rs, "&quot;");
1660 }
1661
1662 if (towordstyle(text->type) == word_Emph &&
1663 (attraux(text->aux) == attr_Last ||
1664 attraux(text->aux) == attr_Only))
1665 rdaddsc(rs, "</em>");
1666 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1667 (attraux(text->aux) == attr_Last ||
1668 attraux(text->aux) == attr_Only))
1669 rdaddsc(rs, "</code>");
1670 break;
1671 }
1672 }
1673}
1674
1675/* Output a heading, formatted as XHTML.
1676 */
ce9921d6 1677static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
d7482997 1678{
1679 rdstringc t = { 0, 0, NULL };
1680 word *tprefix = p->kwtext;
1681 word *nprefix = p->kwtext2;
1682 word *text = p->words;
1683 int level = xhtml_para_level(p);
1684 xhtmlsection *sect = xhtml_find_section(p);
5d9cc07b 1685 xhtmlheadfmt *fmt;
d7482997 1686 char *fragment;
1687 if (sect) {
1688 fragment = sect->fragment;
1689 } else {
d2e74722 1690 if (p->type == para_Title)
1691 fragment = "title";
1692 else {
1693 fragment = ""; /* FIXME: what else can we do? */
1694 error(err_whatever, "Couldn't locate heading cross-reference!");
1695 }
d7482997 1696 }
1697
5d9cc07b 1698 if (p->type == para_Title)
1699 fmt = NULL;
1700 else if (level == 1)
1701 fmt = &conf.fchapter;
1702 else if (level-1 < conf.nfsect)
1703 fmt = &conf.fsect[level-1];
1704 else
1705 fmt = &conf.fsect[conf.nfsect-1];
1706
1707 if (fmt && fmt->just_numbers && nprefix) {
ce9921d6 1708 xhtml_rdaddwc(&t, nprefix, NULL, indexable);
5d9cc07b 1709 if (fmt) {
1710 char *c;
4b3c5afb 1711 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
5d9cc07b 1712 rdaddsc(&t, c);
1713 sfree(c);
1714 }
1715 }
1716 } else if (fmt && !fmt->just_numbers && tprefix) {
ce9921d6 1717 xhtml_rdaddwc(&t, tprefix, NULL, indexable);
5d9cc07b 1718 if (fmt) {
1719 char *c;
4b3c5afb 1720 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
5d9cc07b 1721 rdaddsc(&t, c);
1722 sfree(c);
1723 }
1724 }
d7482997 1725 }
ce9921d6 1726 xhtml_rdaddwc(&t, text, NULL, indexable);
d9d3dd95 1727 /*
1728 * If we're outputting in single-file mode, we need to lower
1729 * the level of each heading by one, because the overall
1730 * document title will be sitting right at the top as an <h1>
1731 * and so chapters and sections should start at <h2>.
1732 *
1733 * Even if not, the document title will come back from
1734 * xhtml_para_level() as level zero, so we must increment that
1735 * no matter what leaf_level is set to.
1736 */
1737 if (conf.leaf_level == 0 || level == 0)
1738 level++;
d7482997 1739 fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1740 sfree(t.text);
1741}
1742
1743/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1744 * This looks pretty simple; I may have missed something ...
1745 */
ce9921d6 1746static void xhtml_para(FILE *fp, word *text, int indexable)
d7482997 1747{
1748 rdstringc out = { 0, 0, NULL };
ce9921d6 1749 xhtml_rdaddwc(&out, text, NULL, indexable);
d7482997 1750 fprintf(fp, "%s", out.text);
1751 sfree(out.text);
1752}
1753
1754/* Output a code paragraph. I'm treating this as preformatted, which
1755 * may not be entirely correct. See xhtml_para() for my worries about
1756 * this being overly-simple; however I think that most of the complexity
1757 * of the text backend came entirely out of word wrapping anyway.
1758 */
1759static void xhtml_codepara(FILE *fp, word *text)
1760{
1761 fprintf(fp, "<pre>");
1762 for (; text; text = text->next) if (text->type == word_WeakCode) {
4b3c5afb 1763 word *here, *next;
d7482997 1764 char *c;
4b3c5afb 1765
1766 /*
1767 * See if this WeakCode is followed by an Emph to indicate
1768 * emphasis.
1769 */
1770 here = text;
1771 if (text->next && text->next->type == word_Emph) {
1772 next = text = text->next;
1773 } else
1774 next = NULL;
1775
1776 if (next) {
1777 wchar_t *t, *e;
1778 int n;
1779
1780 t = here->text;
1781 e = next->text;
1782
1783 while (*e) {
1784 int ec = *e;
1785
1786 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1787 xhtml_convert(t, n, &c, FALSE);
1788 fprintf(fp, "%s%s%s",
1789 (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1790 c,
1791 (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1792 sfree(c);
1793
1794 t += n;
1795 e += n;
1796 }
1797
1798 xhtml_convert(t, 0, &c, FALSE);
1799 fprintf(fp, "%s\n", c);
1800 sfree(c);
1801 } else {
1802 xhtml_convert(here->text, 0, &c, FALSE);
1803 fprintf(fp, "%s\n", c);
1804 sfree(c);
1805 }
d7482997 1806 }
1807 fprintf(fp, "</pre>\n");
1808}