Simplify treatment of the copyright notice, now I've also simplified
[sgt/halibut] / bk_xhtml.c
CommitLineData
d7482997 1/*
2 * xhtml backend for Halibut
3 * (initial implementation by James Aylett)
4 *
5 * Still to do:
6 *
7 * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
8 * +++ entity names (from a file -- ideally supply normal SGML files)
9 * +++ configuration directive to file split where the current layout
10 * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
11 * perhaps others.
12 *
13 * Limitations:
14 *
15 * +++ biblio/index references target the nearest section marker, rather
16 * than having a dedicated target themselves. In large bibliographies
17 * this will cause problems. (The solution is to fake up a response
18 * from xhtml_find_section(), probably linking it into the sections
19 * chain just in case we need it again, and to make freeing it up
20 * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
21 * this is acceptable for now.
22 * +++ can't cope with leaf-level == 0. It's all to do with the
23 * top-level file not being normal, probably not even having a valid
24 * section level, and stuff like that. I question whether this is an
25 * issue, frankly; small manuals that fit on one page should probably
26 * not be written in halibut at all.
27 */
28
29#include <stdio.h>
30#include <stdlib.h>
677e18a2 31#include <string.h>
d7482997 32#include <assert.h>
33#include "halibut.h"
34
35struct xhtmlsection_Struct {
36 struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
37 struct xhtmlsection_Struct *child; /* NULL if split across files */
38 struct xhtmlsection_Struct *parent; /* NULL if split across files */
39 struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
40 paragraph *para;
41 struct xhtmlfile_Struct *file; /* which file is this a part of? */
42 char *fragment; /* fragment id within the file */
43 int level;
44};
45
46struct xhtmlfile_Struct {
47 struct xhtmlfile_Struct *next;
48 struct xhtmlfile_Struct *child;
49 struct xhtmlfile_Struct *parent;
50 char *filename;
51 struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
52 int is_leaf; /* is this file a leaf file, ie does it not have any children? */
53};
54
55typedef struct xhtmlsection_Struct xhtmlsection;
56typedef struct xhtmlfile_Struct xhtmlfile;
57typedef struct xhtmlindex_Struct xhtmlindex;
58
59struct xhtmlindex_Struct {
60 int nsection;
61 int size;
62 xhtmlsection **sections;
63};
64
65typedef struct {
5d9cc07b 66 int just_numbers;
67 wchar_t *number_suffix;
68} xhtmlheadfmt;
69
70typedef struct {
d7482997 71 int contents_depth[6];
72 int leaf_contains_contents;
73 int leaf_level;
74 int leaf_smallest_contents;
75 int include_version_id;
76 wchar_t *author, *description;
77 wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
78 int suppress_address;
5d9cc07b 79 xhtmlheadfmt fchapter, *fsect;
80 int nfsect;
d7482997 81} xhtmlconfig;
82
83/*static void xhtml_level(paragraph *, int);
84static void xhtml_level_0(paragraph *);
85static void xhtml_docontents(FILE *, paragraph *, int);
86static void xhtml_dosections(FILE *, paragraph *, int);
87static void xhtml_dobody(FILE *, paragraph *, int);*/
88
89static void xhtml_doheader(FILE *, word *);
90static void xhtml_dofooter(FILE *);
91static void xhtml_versionid(FILE *, word *, int);
92
93static void xhtml_utostr(wchar_t *, char **);
94static int xhtml_para_level(paragraph *);
95static int xhtml_reservedchar(int);
96
4b3c5afb 97static int xhtml_convert(wchar_t *, int, char **, int);
ce9921d6 98static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
99static void xhtml_para(FILE *, word *, int);
d7482997 100static void xhtml_codepara(FILE *, word *);
ce9921d6 101static void xhtml_heading(FILE *, paragraph *, int);
d7482997 102
103/* File-global variables are much easier than passing these things
104 * all over the place. Evil, but easier. We can replace this with a single
105 * structure at some point.
106 */
107static xhtmlconfig conf;
108static keywordlist *keywords;
109static indexdata *idx;
110static xhtmlfile *topfile;
111static xhtmlsection *topsection;
112static paragraph *sourceparas;
113static xhtmlfile *lastfile;
114static xhtmlfile *xhtml_last_file = NULL;
115static int last_level=-1;
116static xhtmlsection *currentsection;
117
118static xhtmlconfig xhtml_configure(paragraph *source)
119{
120 xhtmlconfig ret;
121
122 /*
123 * Defaults.
124 */
125 ret.contents_depth[0] = 2;
126 ret.contents_depth[1] = 3;
127 ret.contents_depth[2] = 4;
128 ret.contents_depth[3] = 5;
129 ret.contents_depth[4] = 6;
130 ret.contents_depth[5] = 7;
131 ret.leaf_level = 2;
132 ret.leaf_smallest_contents = 4;
133 ret.leaf_contains_contents = FALSE;
134 ret.include_version_id = TRUE;
135 ret.author = NULL;
136 ret.description = NULL;
137 ret.head_end = NULL;
138 ret.body = NULL;
139 ret.body_start = NULL;
140 ret.body_end = NULL;
141 ret.address_start = NULL;
142 ret.address_end = NULL;
143 ret.nav_attrs = NULL;
144 ret.suppress_address = FALSE;
145
5d9cc07b 146 ret.fchapter.just_numbers = FALSE;
147 ret.fchapter.number_suffix = ustrdup(L": ");
148 ret.nfsect = 2;
149 ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
150 ret.fsect[0].just_numbers = FALSE;
151 ret.fsect[0].number_suffix = ustrdup(L": ");
152 ret.fsect[1].just_numbers = TRUE;
153 ret.fsect[1].number_suffix = ustrdup(L" ");
154
d7482997 155 for (; source; source = source->next)
156 {
157 if (source->type == para_Config)
158 {
159 if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
160 ret.contents_depth[0] = utoi(uadv(source->keyword));
161 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
162 ret.contents_depth[1] = utoi(uadv(source->keyword));
163 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
164 ret.contents_depth[2] = utoi(uadv(source->keyword));
165 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
166 ret.contents_depth[3] = utoi(uadv(source->keyword));
167 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
168 ret.contents_depth[4] = utoi(uadv(source->keyword));
169 } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
170 ret.contents_depth[5] = utoi(uadv(source->keyword));
171 } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
172 ret.leaf_level = utoi(uadv(source->keyword));
d7482997 173 } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
174 ret.leaf_smallest_contents = utoi(uadv(source->keyword));
175 } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
176 ret.include_version_id = utob(uadv(source->keyword));
177 } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
178 ret.leaf_contains_contents = utob(uadv(source->keyword));
179 } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
180 ret.suppress_address = utob(uadv(source->keyword));
181 } else if (!ustricmp(source->keyword, L"xhtml-author")) {
182 ret.author = uadv(source->keyword);
183 } else if (!ustricmp(source->keyword, L"xhtml-description")) {
184 ret.description = uadv(source->keyword);
185 } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
186 ret.head_end = uadv(source->keyword);
187 } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
188 ret.body_start = uadv(source->keyword);
189 } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
190 ret.body = uadv(source->keyword);
191 } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
192 ret.body_end = uadv(source->keyword);
193 } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
194 ret.address_start = uadv(source->keyword);
195 } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
196 ret.address_end = uadv(source->keyword);
197 } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
198 ret.nav_attrs = uadv(source->keyword);
5d9cc07b 199 } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
200 ret.fchapter.just_numbers = utob(uadv(source->keyword));
201 } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
a0f2c111 202 ret.fchapter.number_suffix = ustrdup(uadv(source->keyword));
5d9cc07b 203 } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
204 wchar_t *p = uadv(source->keyword);
205 int n = 0;
206 if (uisdigit(*p)) {
207 n = utoi(p);
208 p = uadv(p);
209 }
210 if (n >= ret.nfsect) {
211 int i;
212 ret.fsect = resize(ret.fsect, n+1);
213 for (i = ret.nfsect; i <= n; i++)
214 ret.fsect[i] = ret.fsect[ret.nfsect-1];
215 ret.nfsect = n+1;
216 }
217 ret.fsect[n].just_numbers = utob(p);
218 } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
219 wchar_t *p = uadv(source->keyword);
220 int n = 0;
221 if (uisdigit(*p)) {
222 n = utoi(p);
223 p = uadv(p);
224 }
225 if (n >= ret.nfsect) {
226 int i;
227 ret.fsect = resize(ret.fsect, n+1);
228 for (i = ret.nfsect; i <= n; i++)
229 ret.fsect[i] = ret.fsect[ret.nfsect-1];
230 ret.nfsect = n+1;
231 }
a0f2c111 232 ret.fsect[n].number_suffix = ustrdup(p);
d7482997 233 }
234 }
235 }
236
237 /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
238 printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
239 printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
240 printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
241 printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
242 printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
243 printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
244 printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
245 return ret;
246}
247
248static xhtmlsection *xhtml_new_section(xhtmlsection *last)
249{
250 xhtmlsection *ret = mknew(xhtmlsection);
251 ret->next=NULL;
252 ret->child=NULL;
253 ret->parent=NULL;
254 ret->chain=last;
255 ret->para=NULL;
256 ret->file=NULL;
257 ret->fragment=NULL;
258 ret->level=-1; /* marker: end of chain */
259 return ret;
260}
261
262/* Returns NULL or the section that marks that paragraph */
263static xhtmlsection *xhtml_find_section(paragraph *p)
264{
265 xhtmlsection *ret = topsection;
266 if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
267 paragraph *p2 = sourceparas;
268 paragraph *p3 = NULL;
269 while (p2 && p2!=p) {
270 if (xhtml_para_level(p2)!=-1) {
271 p3 = p2;
272 }
273 p2=p2->next;
274 }
275 if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
276 /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
277 * So don't do that, then.
278 */
279 return NULL;
280 }
281 p=p3;
282 }
283 while (ret && ret->para != p) {
284/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
285 ret=ret->chain;
286 }
287 return ret;
288}
289
290static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
291{
292 xhtmlfile *ret = mknew(xhtmlfile);
293
294 ret->next=NULL;
295 ret->child=NULL;
296 ret->parent=NULL;
297 ret->filename=NULL;
298 ret->sections=sect;
299 ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
300 if (sect==NULL) {
301 if (conf.leaf_level==0) { /* currently unused */
302#define FILENAME_MANUAL "Manual.html"
303#define FILENAME_CONTENTS "Contents.html"
304 ret->filename = smalloc(strlen(FILENAME_MANUAL)+1);
305 sprintf(ret->filename, FILENAME_MANUAL);
306 } else {
307 ret->filename = smalloc(strlen(FILENAME_CONTENTS)+1);
308 sprintf(ret->filename, FILENAME_CONTENTS);
309 }
310 } else {
311 paragraph *p = sect->para;
312 rdstringc fname_c = { 0, 0, NULL };
313 char *c;
314 word *w;
315 for (w=(p->kwtext)?(p->kwtext):(p->words); w; w=w->next)
316 {
317 switch (removeattr(w->type))
318 {
319 case word_Normal:
320 /*case word_Emph:
321 case word_Code:
322 case word_WeakCode:*/
323 xhtml_utostr(w->text, &c);
324 rdaddsc(&fname_c,c);
325 sfree(c);
326 break;
327 }
328 }
329 rdaddsc(&fname_c, ".html");
330 ret->filename = rdtrimc(&fname_c);
331 }
332 /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
333 return ret;
334}
335
336/*
337 * Walk the tree fixing up files which are actually leaf (ie
338 * have no children) but aren't at leaf level, so they have the
339 * leaf flag set.
340 */
341void xhtml_fixup_layout(xhtmlfile* file)
342{
343 if (file->child==NULL) {
344 file->is_leaf = TRUE;
345 } else {
346 xhtml_fixup_layout(file->child);
347 }
348 if (file->next)
349 xhtml_fixup_layout(file->next);
350}
351
352/*
353 * Create the tree structure so we know where everything goes.
354 * Method:
355 *
356 * Ignoring file splitting, we have three choices with each new section:
357 *
358 * +-----------------+-----------------+
359 * | | |
360 * X +----X----+ (1)
361 * | |
5d9cc07b 362 * Y (2)
d7482997 363 * |
364 * (3)
365 *
366 * Y is the last section we added (currentsect).
367 * If sect is the section we want to add, then:
368 *
369 * (1) if sect->level < currentsect->level
370 * (2) if sect->level == currentsect->level
371 * (3) if sect->level > currentsect->level
372 *
373 * This requires the constraint that you never skip section numbers
374 * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
375 *
376 * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
377 * more than one level at a time. Lots of asserts, and probably part of
378 * the algorithm here, rely on this being true. (It currently isn't
379 * enforced by halibut, however.)
380 *
381 * File splitting makes this harder. For instance, say we added at (3)
382 * above and now need to add another section. We are splitting at level
383 * 2, ie the level of Y. Z is the last section we added:
384 *
385 * +-----------------+-----------------+
386 * | | |
387 * X +----X----+ (1)
388 * | |
389 * +----Y----+ (1)
390 * | |
391 * Z (2)
392 * |
393 * (3)
394 *
395 * The (1) case is now split; we need to search upwards to find where
396 * to actually link in. The other two cases remain the same (and will
397 * always be like this).
398 *
399 * File splitting makes this harder, however. The decision of whether
400 * to split to a new file is always on the same condition, however (is
401 * the level of this section higher than the leaf_level configuration
402 * value or not).
403 *
404 * Treating the cases backwards:
405 *
406 * (3) same file if sect->level > conf.leaf_level, otherwise new file
407 *
408 * if in the same file, currentsect->child points to sect
409 * otherwise the linking is done through the file tree (which works
410 * in more or less the same way, ie currentfile->child points to
411 * the new file)
412 *
413 * (2) same file if sect->level > conf.leaf_level, otherwise new file
414 *
415 * if in the same file, currentsect->next points to sect
416 * otherwise file linking and currentfile->next points to the new
417 * file (we know that Z must have caused a new file to be created)
418 *
419 * (1) same file if sect->level > conf.leaf_level, otherwise new file
420 *
421 * this is actually effectively the same case as (2) here,
422 * except that we first have to travel up the sections to figure
423 * out which section this new one will be a sibling of. In doing
424 * so, we may disappear off the top of a file and have to go up
425 * to its parent in the file tree.
426 *
427 */
428static void xhtml_ponder_layout(paragraph *p)
429{
430 xhtmlsection *lastsection;
431 xhtmlsection *currentsect;
432 xhtmlfile *currentfile;
433
434 lastfile = NULL;
435 topsection = xhtml_new_section(NULL);
436 topfile = xhtml_new_file(NULL);
437 lastsection = topsection;
438 currentfile = topfile;
439 currentsect = topsection;
440
d2e74722 441 if (conf.leaf_level == 0) {
442 topfile->is_leaf = 1;
443 topfile->sections = topsection;
444 topsection->file = topfile;
445 }
446
d7482997 447 for (; p; p=p->next)
448 {
449 int level = xhtml_para_level(p);
450 if (level>0) /* actually a section */
451 {
452 xhtmlsection *sect;
453 word *w;
454 char *c;
455 rdstringc fname_c = { 0, 0, NULL };
456
457 sect = xhtml_new_section(lastsection);
458 lastsection = sect;
459 sect->para = p;
460 for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
461 {
462 switch (removeattr(w->type))
463 {
464 case word_Normal:
465 /*case word_Emph:
466 case word_Code:
467 case word_WeakCode:*/
468 xhtml_utostr(w->text, &c);
469 rdaddsc(&fname_c,c);
470 sfree(c);
471 break;
472 }
473 }
474/* rdaddsc(&fname_c, ".html");*/
475 sect->fragment = rdtrimc(&fname_c);
476 sect->level = level;
477 /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
478
479 if (level>currentsect->level) { /* case (3) */
480 if (level>conf.leaf_level) { /* same file */
481 assert(currentfile->is_leaf);
482 currentsect->child = sect;
483 sect->parent=currentsect;
484 sect->file=currentfile;
485 /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
486 currentsect=sect;
487 } else { /* new file */
488 xhtmlfile *file = xhtml_new_file(sect);
489 assert(!currentfile->is_leaf);
490 currentfile->child=file;
491 sect->file=file;
492 file->parent=currentfile;
493 /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
494 currentfile=file;
495 currentsect=sect;
496 }
497 } else if (level >= currentsect->file->sections->level) {
498 /* Case (1) or (2) *AND* still under the section that starts
499 * the current file.
500 *
501 * I'm not convinced that this couldn't be rolled in with the
502 * final else {} leg further down. It seems a lot of effort
503 * this way.
504 */
505 if (level>conf.leaf_level) { /* stick within the same file */
506 assert(currentfile->is_leaf);
507 sect->file = currentfile;
508 while (currentsect && currentsect->level > level &&
509 currentsect->file==currentsect->parent->file) {
510 currentsect = currentsect->parent;
511 }
512 assert(currentsect);
513 currentsect->next = sect;
514 assert(currentsect->level == sect->level);
515 sect->parent = currentsect->parent;
516 currentsect = sect;
517 /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
518 } else { /* new file */
519 xhtmlfile *file = xhtml_new_file(sect);
520 sect->file=file;
521 currentfile->next=file;
522 file->parent=currentfile->parent;
523 file->is_leaf=(level==conf.leaf_level);
524 file->sections=sect;
525 /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
526 currentfile=file;
527 currentsect=sect;
528 }
529 } else { /* Case (1) or (2) and we must move up the file tree first */
530 /* this loop is now probably irrelevant - we know we can't connect
531 * to anything in the current file */
532 while (currentsect && level<currentsect->level) {
533 currentsect=currentsect->parent;
534 if (currentsect) {
535 /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
536 } else {
537 /* printf(" * up one level (off top of current file)\n");*/
538 }
539 }
540 if (currentsect) {
541 /* I'm pretty sure this can now never fire */
542 assert(currentfile->is_leaf);
543 /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
544 sect->file = currentfile;
545 currentsect->next=sect;
546 currentsect=sect;
547 } else { /* find a file we can attach to */
548 while (currentfile && currentfile->sections && level<currentfile->sections->level) {
549 currentfile=currentfile->parent;
550 if (currentfile) {
551 /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
552 } else {
553 /* printf(" * up one file level (off top of tree)\n");*/
554 }
555 }
556 if (currentfile) { /* new file (we had to skip up a file to
557 get here, so we must be dealing with a
558 level no lower than the configured
559 leaf_level */
560 xhtmlfile *file = xhtml_new_file(sect);
561 currentfile->next=file;
562 sect->file=file;
563 file->parent=currentfile->parent;
564 file->is_leaf=(level==conf.leaf_level);
565 file->sections=sect;
566 /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
567 currentfile=file;
568 currentsect=sect;
569 } else {
570 fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
571 }
572 }
573 }
574 }
575 }
576 topsection = lastsection; /* get correct end of the chain */
577 xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
578}
579
580static void xhtml_do_index();
581static void xhtml_do_file(xhtmlfile *file);
582static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
ce9921d6 583static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
d7482997 584static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
585static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
586static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
587static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
588static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
589static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
590
591/*
592 * Do all the files in this structure.
593 */
594static void xhtml_do_files(xhtmlfile *file)
595{
596 xhtml_do_file(file);
597 if (file->child)
598 xhtml_do_files(file->child);
599 if (file->next)
600 xhtml_do_files(file->next);
601}
602
603/*
604 * Free up all memory used by the file tree from 'xfile' downwards
605 */
606static void xhtml_free_file(xhtmlfile* xfile)
607{
608 if (xfile==NULL) {
609 return;
610 }
611
612 if (xfile->filename) {
613 sfree(xfile->filename);
614 }
615 xhtml_free_file(xfile->child);
616 xhtml_free_file(xfile->next);
617 sfree(xfile);
618}
619
620/*
621 * Main function.
622 */
623void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
624 indexdata *in_idx)
625{
626/* int i;*/
627 indexentry *ientry;
628 int ti;
629 xhtmlsection *xsect;
630
631 sourceparas = sourceform;
632 conf = xhtml_configure(sourceform);
633 keywords = in_keywords;
634 idx = in_idx;
635
636 /* Clear up the index entries backend data pointers */
637 for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
638 ientry->backend_data=NULL;
639 }
640
641 xhtml_ponder_layout(sourceform);
642
643 /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
644/* xhtml_level_0(sourceform);
645 for (i=1; i<=conf.leaf_level; i++)
646 {
647 xhtml_level(sourceform, i);
648 }*/
649
650 /* new system ... (writes to *.html, but isn't fully trusted) */
651 xhtml_do_top_file(topfile, sourceform);
652 assert(!topfile->next); /* shouldn't have a sibling at all */
d2e74722 653 if (topfile->child) {
654 xhtml_do_files(topfile->child);
655 xhtml_do_index();
656 }
d7482997 657
658 /* release file, section, index data structures */
659 xsect = topsection;
660 while (xsect) {
661 xhtmlsection *tmp = xsect->chain;
662 if (xsect->fragment) {
663 sfree(xsect->fragment);
664 }
665 sfree(xsect);
666 xsect = tmp;
667 }
668 xhtml_free_file(topfile);
669 for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
670 if (ientry->backend_data!=NULL) {
671 xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
672 if (xi->sections!=NULL) {
673 sfree(xi->sections);
674 }
675 sfree(xi);
676 }
677 ientry->backend_data = NULL;
678 }
677e18a2 679 {
680 int i;
681 sfree(conf.fchapter.number_suffix);
682 for (i = 0; i < conf.nfsect; i++)
683 sfree(conf.fsect[i].number_suffix);
684 sfree(conf.fsect);
685 }
d7482997 686}
687
688static int xhtml_para_level(paragraph *p)
689{
690 switch (p->type)
691 {
d9d3dd95 692 case para_Title:
693 return 0;
694 break;
d7482997 695 case para_UnnumberedChapter:
696 case para_Chapter:
697 case para_Appendix:
698 return 1;
699 break;
700/* case para_BiblioCited:
701 return 2;
702 break;*/
703 case para_Heading:
704 case para_Subsect:
705 return p->aux+2;
706 break;
707 default:
708 return -1;
709 break;
710 }
711}
712
713static char* xhtml_index_filename = "IndexPage.html";
714
715/* Output the nav links for the current file.
716 * file == NULL means we're doing the index
717 */
718static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
719{
720 xhtmlfile *xhtml_next_file = NULL;
721 fprintf(fp, "<p");
722 if (conf.nav_attrs!=NULL) {
723 fprintf(fp, " %ls>", conf.nav_attrs);
724 } else {
725 fprintf(fp, ">");
726 }
727 if (xhtml_last_file==NULL) {
728 fprintf(fp, "Previous | ");
729 } else {
730 fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
731 }
732 fprintf(fp, "<a href='Contents.html'>Contents</a> | ");
733 if (file != NULL) { /* otherwise we're doing nav links for the index */
734 if (xhtml_next_file==NULL)
735 xhtml_next_file = file->child;
736 if (xhtml_next_file==NULL)
737 xhtml_next_file = file->next;
738 if (xhtml_next_file==NULL)
739 xhtml_next_file = file->parent->next;
740 }
741 if (xhtml_next_file==NULL) {
742 if (file==NULL) { /* index, so no next file */
743 fprintf(fp, "Next ");
744 } else {
745 fprintf(fp, "<a href='%s'>Next</a>", xhtml_index_filename);
746 }
747 } else {
748 fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
749 }
750 fprintf(fp, "</p>\n");
751}
752
753/* Write out the index file */
d2e74722 754static void xhtml_do_index_body(FILE *fp)
d7482997 755{
d7482997 756 indexentry *y;
757 int ti;
d7482997 758
d2e74722 759 if (count234(idx->entries) == 0)
760 return; /* don't write anything at all */
d7482997 761
762 fprintf(fp, "<dl>\n");
763 /* iterate over idx->entries using the tree functions and display everything */
764 for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
765 if (y->backend_data) {
766 int i;
767 xhtmlindex *xi;
768
769 fprintf(fp, "<dt>");
ce9921d6 770 xhtml_para(fp, y->text, FALSE);
d7482997 771 fprintf(fp, "</dt>\n<dd>");
772
773 xi = (xhtmlindex*) y->backend_data;
774 for (i=0; i<xi->nsection; i++) {
775 xhtmlsection *sect = xi->sections[i];
776 if (sect) {
777 fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
778 if (sect->para->kwtext) {
ce9921d6 779 xhtml_para(fp, sect->para->kwtext, FALSE);
d7482997 780 } else if (sect->para->words) {
ce9921d6 781 xhtml_para(fp, sect->para->words, FALSE);
d7482997 782 }
783 fprintf(fp, "</a>");
784 if (i+1<xi->nsection) {
785 fprintf(fp, ", ");
786 }
787 }
788 }
789 fprintf(fp, "</dd>\n");
790 }
791 }
792 fprintf(fp, "</dl>\n");
d2e74722 793}
794static void xhtml_do_index()
795{
796 word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
797 FILE *fp = fopen(xhtml_index_filename, "w");
798
799 if (fp==NULL)
800 fatal(err_cantopenw, xhtml_index_filename);
801 xhtml_doheader(fp, &temp_word);
802 xhtml_donavlinks(fp, NULL);
803
804 xhtml_do_index_body(fp);
d7482997 805
806 xhtml_donavlinks(fp, NULL);
807 xhtml_dofooter(fp);
808 fclose(fp);
809}
810
811/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
812static void xhtml_do_file(xhtmlfile *file)
813{
814 FILE *fp = fopen(file->filename, "w");
815 if (fp==NULL)
816 fatal(err_cantopenw, file->filename);
817
818 if (file->sections->para->words) {
819 xhtml_doheader(fp, file->sections->para->words);
820 } else if (file->sections->para->kwtext) {
821 xhtml_doheader(fp, file->sections->para->kwtext);
822 } else {
823 xhtml_doheader(fp, NULL);
824 }
825
826 xhtml_donavlinks(fp, file);
827
d2e74722 828 if (file->is_leaf && conf.leaf_contains_contents &&
829 xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
d7482997 830 xhtml_do_contents(fp, file);
831 xhtml_do_sections(fp, file->sections);
832 if (!file->is_leaf)
833 xhtml_do_naked_contents(fp, file);
834
835 xhtml_donavlinks(fp, file);
836
837 xhtml_dofooter(fp);
838 fclose(fp);
839
840 xhtml_last_file = file;
841}
842
843/* Output the top-level file. */
844static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
845{
846 paragraph *p;
847 int done=FALSE;
848 FILE *fp = fopen(file->filename, "w");
849 if (fp==NULL)
850 fatal(err_cantopenw, file->filename);
851
852 /* Do the title -- only one allowed */
853 for (p = sourceform; p && !done; p = p->next)
854 {
855 if (p->type == para_Title)
856 {
857 xhtml_doheader(fp, p->words);
858 done=TRUE;
859 }
860 }
861 if (!done)
862 xhtml_doheader(fp, NULL /* Eek! */);
863
d2e74722 864 /*
865 * Display the title.
866 */
867 for (p = sourceform; p; p = p->next)
868 {
869 if (p->type == para_Title) {
ce9921d6 870 xhtml_heading(fp, p, FALSE);
d2e74722 871 break;
872 }
873 }
874
9057a0a8 875 /* Do the preamble */
d7482997 876 for (p = sourceform; p; p = p->next)
877 {
8902e0ed 878 if (p->type == para_Chapter || p->type == para_Heading ||
879 p->type == para_Subsect || p->type == para_Appendix ||
880 p->type == para_UnnumberedChapter) {
881 /*
882 * We've found the end of the preamble. Do every normal
883 * paragraph up to there.
884 */
ce9921d6 885 xhtml_do_paras(fp, sourceform, p, FALSE);
8902e0ed 886 break;
d7482997 887 }
888 }
d7482997 889
890 xhtml_do_contents(fp, file);
891 xhtml_do_sections(fp, file->sections);
d2e74722 892
5d9cc07b 893 /*
894 * Put the index in the top file if we're in single-file mode
895 * (leaf-level 0).
896 */
897 if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
d2e74722 898 fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
899 xhtml_do_index_body(fp);
900 }
901
d7482997 902 xhtml_dofooter(fp);
903 fclose(fp);
904}
905
906/* Convert a Unicode string to an ASCII one. '?' is
907 * used for unmappable characters.
908 */
909static void xhtml_utostr(wchar_t *in, char **out)
910{
911 int l = ustrlen(in);
912 int i;
913 *out = smalloc(l+1);
914 for (i=0; i<l; i++)
915 {
916 if (in[i]>=32 && in[i]<=126)
917 (*out)[i]=(char)in[i];
918 else
919 (*out)[i]='?';
920 }
921 (*out)[i]=0;
922}
923
924/*
925 * Write contents for the given file, and subfiles, down to
926 * the appropriate contents depth. Returns the number of
927 * entries written.
928 */
929static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
930{
931 int level, limit, start_level, count = 0;
932 if (!file)
933 return 0;
934
935 level = (file->sections)?(file->sections->level):(0);
936 limit = conf.contents_depth[(level>5)?(5):(level)];
937 start_level = (file->is_leaf) ? (level-1) : (level);
938 last_level = start_level;
939
940 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
941 count += xhtml_do_contents_limit(fp, file->child, limit);
942 if (fp!=NULL) {
943 while (last_level > start_level) {
944 last_level--;
945 fprintf(fp, "</ul>\n");
946 }
947 }
948 return count;
949}
950
951/* As above, but doesn't do anything in the current file */
952static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
953{
954 int level, limit, start_level, count = 0;
955 if (!file)
956 return 0;
957
958 level = (file->sections)?(file->sections->level):(0);
959 limit = conf.contents_depth[(level>5)?(5):(level)];
960 start_level = (file->is_leaf) ? (level-1) : (level);
961 last_level = start_level;
962
963 count = xhtml_do_contents_limit(fp, file->child, limit);
964 if (fp!=NULL) {
965 while (last_level > start_level) {
966 last_level--;
967 fprintf(fp, "</ul>\n");
968 }
969 }
970 return count;
971}
972
973/*
974 * Write contents for the given file, children, and siblings, down to
975 * given limit contents depth.
976 */
977static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
978{
979 int count = 0;
980 while (file) {
981 count += xhtml_do_contents_section_limit(fp, file->sections, limit);
982 count += xhtml_do_contents_limit(fp, file->child, limit);
983 file = file->next;
984 }
985 return count;
986}
987
988/*
989 * Write contents entries for the given section tree, down to the
990 * limit contents depth.
991 */
992static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
993{
994 int count = 0;
995 while (section) {
996 if (!xhtml_add_contents_entry(fp, section, limit))
997 return 0;
998 else
999 count++;
1000 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1001 section = section->next;
1002 }
1003 return count;
1004}
1005
1006/*
1007 * Write contents entries for the given section tree, down to the
1008 * limit contents depth.
1009 */
1010static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1011{
1012 int count = 0;
1013 if (!section)
1014 return 0;
1015 xhtml_add_contents_entry(fp, section, limit);
1016 count=1;
1017 count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1018 /* section=section->child;
1019 while (section && xhtml_add_contents_entry(fp, section, limit)) {
1020 section = section->next;
1021 }*/
1022 return count;
1023}
1024
1025/*
1026 * Add a section entry, unless we're exceeding the limit, in which
1027 * case return FALSE (otherwise return TRUE).
1028 */
1029static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1030{
1031 if (!section || section->level > limit)
1032 return FALSE;
5d9cc07b 1033 if (fp==NULL || section->level < 0)
d7482997 1034 return TRUE;
1035 while (last_level > section->level) {
1036 last_level--;
1037 fprintf(fp, "</ul>\n");
1038 }
1039 while (last_level < section->level) {
1040 last_level++;
1041 fprintf(fp, "<ul>\n");
1042 }
1043 fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1044 if (section->para->kwtext) {
ce9921d6 1045 xhtml_para(fp, section->para->kwtext, FALSE);
d7482997 1046 if (section->para->words) {
1047 fprintf(fp, ": ");
1048 }
1049 }
1050 if (section->para->words) {
ce9921d6 1051 xhtml_para(fp, section->para->words, FALSE);
d7482997 1052 }
1053 fprintf(fp, "</a></li>\n");
1054 return TRUE;
1055}
1056
1057/*
1058 * Write all the sections in this file. Do all paragraphs in this section, then all
1059 * children (recursively), then go on to the next one (tail recursively).
1060 */
1061static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1062{
1063 while (sections) {
1064 currentsection = sections;
ce9921d6 1065 xhtml_do_paras(fp, sections->para, NULL, TRUE);
d7482997 1066 xhtml_do_sections(fp, sections->child);
1067 sections = sections->next;
1068 }
1069}
1070
1071/* Write this list of paragraphs. Close off all lists at the end. */
ce9921d6 1072static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1073 int indexable)
d7482997 1074{
7136a6c7 1075 int last_type = -1, ptype, first=TRUE;
1076 stack lcont_stack = stk_new();
d7482997 1077 if (!p)
1078 return;
1079
1080/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
8902e0ed 1081 for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
d7482997 1082 first=FALSE;
7136a6c7 1083 switch (ptype = p->type)
d7482997 1084 {
1085 /*
1086 * Things we ignore because we've already processed them or
1087 * aren't going to touch them in this pass.
1088 */
1089 case para_IM:
1090 case para_BR:
1091 case para_Biblio: /* only touch BiblioCited */
1092 case para_VersionID:
d7482997 1093 case para_NoCite:
1094 case para_Title:
1095 break;
1096
1097 /*
1098 * Chapter titles.
1099 */
1100 case para_Chapter:
1101 case para_Appendix:
1102 case para_UnnumberedChapter:
ce9921d6 1103 xhtml_heading(fp, p, indexable);
d7482997 1104 break;
1105
1106 case para_Heading:
1107 case para_Subsect:
ce9921d6 1108 xhtml_heading(fp, p, indexable);
d7482997 1109 break;
1110
1111 case para_Rule:
1112 fprintf(fp, "\n<hr />\n");
1113 break;
1114
1115 case para_Normal:
9057a0a8 1116 case para_Copyright:
d7482997 1117 fprintf(fp, "\n<p>");
ce9921d6 1118 xhtml_para(fp, p->words, indexable);
d7482997 1119 fprintf(fp, "</p>\n");
1120 break;
1121
7136a6c7 1122 case para_LcontPush:
1123 {
1124 int *p;
1125 p = mknew(int);
1126 *p = last_type;
1127 stk_push(lcont_stack, p);
1128 last_type = para_Normal;
1129 }
1130 break;
1131 case para_LcontPop:
1132 {
1133 int *p = stk_pop(lcont_stack);
1134 assert(p);
1135 ptype = last_type = *p;
1136 sfree(p);
1137 goto closeofflist; /* ick */
1138 }
1139 break;
2614b01d 1140 case para_QuotePush:
1141 fprintf(fp, "<blockquote>\n");
1142 break;
1143 case para_QuotePop:
1144 fprintf(fp, "</blockquote>\n");
1145 break;
7136a6c7 1146
d7482997 1147 case para_Bullet:
1148 case para_NumberedList:
7136a6c7 1149 case para_Description:
1150 case para_DescribedThing:
d7482997 1151 case para_BiblioCited:
1152 if (last_type!=p->type) {
1153 /* start up list if necessary */
1154 if (p->type == para_Bullet) {
1155 fprintf(fp, "<ul>\n");
1156 } else if (p->type == para_NumberedList) {
1157 fprintf(fp, "<ol>\n");
7136a6c7 1158 } else if (p->type == para_BiblioCited ||
1159 p->type == para_DescribedThing ||
1160 p->type == para_Description) {
d7482997 1161 fprintf(fp, "<dl>\n");
1162 }
1163 }
7136a6c7 1164 if (p->type == para_Bullet || p->type == para_NumberedList) {
d7482997 1165 fprintf(fp, "<li>");
7136a6c7 1166 } else if (p->type == para_DescribedThing) {
1167 fprintf(fp, "<dt>");
1168 } else if (p->type == para_Description) {
1169 fprintf(fp, "<dd>");
1170 } else if (p->type == para_BiblioCited) {
d7482997 1171 fprintf(fp, "<dt>");
ce9921d6 1172 xhtml_para(fp, p->kwtext, indexable);
d7482997 1173 fprintf(fp, "</dt>\n<dd>");
1174 }
ce9921d6 1175 xhtml_para(fp, p->words, indexable);
7136a6c7 1176 {
1177 paragraph *p2 = p->next;
1178 if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1179 break;
1180 }
1181
1182 closeofflist:
1183 if (ptype == para_BiblioCited) {
d7482997 1184 fprintf(fp, "</dd>\n");
7136a6c7 1185 } else if (p->type == para_DescribedThing) {
1186 fprintf(fp, "</dt>");
1187 } else if (p->type == para_Description) {
1188 fprintf(fp, "</dd>");
1189 } else if (ptype == para_Bullet || ptype == para_NumberedList) {
d7482997 1190 fprintf(fp, "</li>");
1191 }
7136a6c7 1192 if (ptype == para_Bullet || ptype == para_NumberedList ||
1193 ptype == para_BiblioCited || ptype == para_Description ||
1194 ptype == para_DescribedThing)
d7482997 1195 /* close off list if necessary */
1196 {
1197 paragraph *p2 = p->next;
1198 int close_off=FALSE;
1199/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1200 if (p2 && xhtml_para_level(p2)==-1) {
7136a6c7 1201 if (p2->type != ptype && p2->type != para_LcontPush)
d7482997 1202 close_off=TRUE;
1203 } else {
1204 close_off=TRUE;
1205 }
1206 if (close_off) {
7136a6c7 1207 if (ptype == para_Bullet) {
d7482997 1208 fprintf(fp, "</ul>\n");
7136a6c7 1209 } else if (ptype == para_NumberedList) {
d7482997 1210 fprintf(fp, "</ol>\n");
7136a6c7 1211 } else if (ptype == para_BiblioCited ||
1212 ptype == para_Description ||
1213 ptype == para_DescribedThing) {
d7482997 1214 fprintf(fp, "</dl>\n");
1215 }
1216 }
1217 }
1218 break;
1219
1220 case para_Code:
1221 xhtml_codepara(fp, p->words);
1222 break;
1223 }
7136a6c7 1224 last_type = ptype;
d7482997 1225 }
7136a6c7 1226
1227 stk_free(lcont_stack);
d7482997 1228}
1229
1230/*
1231 * Output a header for this XHTML file.
1232 */
1233static void xhtml_doheader(FILE *fp, word *title)
1234{
1235 fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1236 fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1237 fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1238 if (title==NULL)
1239 fprintf(fp, "The thing with no name!");
1240 else
ce9921d6 1241 xhtml_para(fp, title, FALSE);
d7482997 1242 fprintf(fp, "</title>\n");
1243 fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1244 if (conf.author)
1245 fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1246 if (conf.description)
1247 fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1248 if (conf.head_end)
1249 fprintf(fp, "%ls\n", conf.head_end);
1250 fprintf(fp, "</head>\n\n");
1251 if (conf.body)
1252 fprintf(fp, "%ls\n", conf.body);
1253 else
1254 fprintf(fp, "<body>\n");
1255 if (conf.body_start)
1256 fprintf(fp, "%ls\n", conf.body_start);
1257}
1258
1259/*
1260 * Output a footer for this XHTML file.
1261 */
1262static void xhtml_dofooter(FILE *fp)
1263{
1264 fprintf(fp, "\n<hr />\n\n");
1265 if (conf.body_end)
1266 fprintf(fp, "%ls\n", conf.body_end);
1267 if (!conf.suppress_address) {
1268 fprintf(fp,"<address>\n");
1269 if (conf.address_start)
1270 fprintf(fp, "%ls\n", conf.address_start);
1271 /* Do the version ID */
1272 if (conf.include_version_id) {
1273 paragraph *p;
1274 int started = 0;
1275 for (p = sourceparas; p; p = p->next)
1276 if (p->type == para_VersionID) {
1277 xhtml_versionid(fp, p->words, started);
1278 started = 1;
1279 }
1280 }
1281 if (conf.address_end)
1282 fprintf(fp, "%ls\n", conf.address_end);
1283 fprintf(fp, "</address>\n");
1284 }
1285 fprintf(fp, "</body>\n\n</html>\n");
1286}
1287
1288/*
1289 * Output the versionid paragraph. Typically this is a version control
1290 * ID string (such as $Id...$ in RCS).
1291 */
1292static void xhtml_versionid(FILE *fp, word *text, int started)
1293{
1294 rdstringc t = { 0, 0, NULL };
1295
1296 rdaddc(&t, '['); /* FIXME: configurability */
ce9921d6 1297 xhtml_rdaddwc(&t, text, NULL, FALSE);
d7482997 1298 rdaddc(&t, ']'); /* FIXME: configurability */
1299
1300 if (started)
1301 fprintf(fp, "<br>\n");
1302 fprintf(fp, "%s\n", t.text);
1303 sfree(t.text);
1304}
1305
1306/* Is this an XHTML reserved character? */
1307static int xhtml_reservedchar(int c)
1308{
1309 if (c=='&' || c=='<' || c=='>' || c=='"')
1310 return TRUE;
1311 else
1312 return FALSE;
1313}
1314
1315/*
1316 * Convert a wide string into valid XHTML: Anything outside ASCII will
1317 * be fixed up as an entity. Currently we don't worry about constraining the
1318 * encoded character set, which we should probably do at some point (we can
1319 * still fix up and return FALSE - see the last comment here). We also don't
1320 * currently
1321 *
1322 * Because this is only used for words, spaces are HARD spaces (any other
1323 * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1324 * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1325 * rule).
1326 *
1327 * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1328 * it in `*result'. If `result' is NULL, merely checks whether all
1329 * characters in the string are feasible.
1330 *
1331 * Return is nonzero if all characters are OK. If not all
1332 * characters are OK but `result' is non-NULL, a result _will_
1333 * still be generated!
1334 */
4b3c5afb 1335static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1336 int hard_spaces) {
d7482997 1337 int doing = (result != 0);
1338 int ok = TRUE;
1339 char *p = NULL;
1340 int plen = 0, psize = 0;
1341
4b3c5afb 1342 if (maxlen <= 0)
1343 maxlen = -1;
1344
1345 for (; *s && maxlen != 0; s++, maxlen--) {
d7482997 1346 wchar_t c = *s;
1347
1348#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1349
1350 if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1351 /* Char is OK. */
1352 if (doing)
1353 {
1354 ensure_size(plen);
1355 p[plen++] = (char)c;
1356 }
1357 } else {
1358 /* Char needs fixing up. */
1359 /* ok = FALSE; -- currently we never return FALSE; we
1360 * might want to when considering a character set for the
1361 * encoded document.
1362 */
1363 if (doing)
1364 {
1365 if (c==32) { /* a space in a word is a hard space */
1366 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1367 sprintf(p+plen, "&nbsp;");
1368 plen+=6;
1369 } else {
1370 /* FIXME: entity names! */
1371 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1372 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1373 }
1374 }
1375 }
1376 }
1377 if (doing) {
1378 p = resize(p, plen+1);
1379 p[plen] = '\0';
1380 *result = p;
1381 }
1382 return ok;
1383}
1384
1385/*
1386 * This formats the given words as XHTML.
ce9921d6 1387 *
1388 * `indexable', if FALSE, prohibits adding any index references.
1389 * You might use this, for example, if an index reference occurred
1390 * in a section title, to prevent phony index references when the
1391 * section title is processed in strange places such as contents
1392 * sections.
d7482997 1393 */
ce9921d6 1394static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
d7482997 1395 char *c;
1396 keyword *kwl;
1397 xhtmlsection *sect;
1398 indextag *itag;
1399 int ti;
1400
1401 for (; text && text != end; text = text->next) {
1402 switch (text->type) {
1403 case word_HyperLink:
1404 xhtml_utostr(text->text, &c);
1405 rdaddsc(rs, "<a href=\"");
1406 rdaddsc(rs, c);
1407 rdaddsc(rs, "\">");
1408 sfree(c);
1409 break;
1410
1411 case word_UpperXref:
1412 case word_LowerXref:
1413 kwl = kw_lookup(keywords, text->text);
1414 if (kwl) {
1415 sect=xhtml_find_section(kwl->para);
1416 if (sect) {
1417 rdaddsc(rs, "<a href=\"");
1418 rdaddsc(rs, sect->file->filename);
1419 rdaddc(rs, '#');
1420 rdaddsc(rs, sect->fragment);
1421 rdaddsc(rs, "\">");
1422 } else {
1423 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1424 error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1425 }
1426 } else {
1427 rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1428 error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1429 }
1430 break;
1431
1432 case word_IndexRef: /* in theory we could make an index target here */
1433/* rdaddsc(rs, "<a name=\"idx-");
1434 xhtml_utostr(text->text, &c);
1435 rdaddsc(rs, c);
1436 sfree(c);
1437 rdaddsc(rs, "\"></a>");*/
1438 /* what we _do_ need to do is to fix up the backend data
1439 * for any indexentry this points to.
1440 */
ce9921d6 1441 if (!indexable)
1442 break;
1443
d7482997 1444 for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1445 /* FIXME: really ustricmp() and not ustrcmp()? */
1446 if (ustricmp(itag->name, text->text)==0) {
1447 break;
1448 }
1449 }
1450 if (itag!=NULL) {
1451 if (itag->refs!=NULL) {
1452 int i;
1453 for (i=0; i<itag->nrefs; i++) {
1454 xhtmlindex *idx_ref;
1455 indexentry *ientry;
1456
1457 ientry = itag->refs[i];
1458 if (ientry->backend_data==NULL) {
1459 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1460 if (idx_ref==NULL)
1461 fatal(err_nomemory);
1462 idx_ref->nsection = 0;
1463 idx_ref->size = 4;
1464 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1465 if (idx_ref->sections==NULL)
1466 fatal(err_nomemory);
1467 ientry->backend_data = idx_ref;
1468 } else {
1469 idx_ref = ientry->backend_data;
1470 if (idx_ref->nsection+1 > idx_ref->size) {
1471 int new_size = idx_ref->size * 2;
1472 idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1473 if (idx_ref->sections==NULL) {
1474 fatal(err_nomemory);
1475 }
1476 idx_ref->size = new_size;
1477 }
1478 }
1479 idx_ref->sections[idx_ref->nsection++] = currentsection;
1480#if 0
1481#endif
1482 }
1483 } else {
1484 fatal(err_whatever, "Index tag had no entries!");
1485 }
1486 } else {
1487 fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1488 fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1489 }
1490 break;
1491
1492 case word_HyperEnd:
1493 case word_XrefEnd:
1494 rdaddsc(rs, "</a>");
1495 break;
1496
1497 case word_Normal:
1498 case word_Emph:
1499 case word_Code:
1500 case word_WeakCode:
1501 case word_WhiteSpace:
1502 case word_EmphSpace:
1503 case word_CodeSpace:
1504 case word_WkCodeSpace:
1505 case word_Quote:
1506 case word_EmphQuote:
1507 case word_CodeQuote:
1508 case word_WkCodeQuote:
1509 assert(text->type != word_CodeQuote &&
1510 text->type != word_WkCodeQuote);
1511 if (towordstyle(text->type) == word_Emph &&
1512 (attraux(text->aux) == attr_First ||
1513 attraux(text->aux) == attr_Only))
1514 rdaddsc(rs, "<em>");
1515 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1516 (attraux(text->aux) == attr_First ||
1517 attraux(text->aux) == attr_Only))
1518 rdaddsc(rs, "<code>");
1519
1520 if (removeattr(text->type) == word_Normal) {
4b3c5afb 1521 if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
d7482997 1522 rdaddsc(rs, c);
1523 else
ce9921d6 1524 xhtml_rdaddwc(rs, text->alt, NULL, indexable);
d7482997 1525 sfree(c);
1526 } else if (removeattr(text->type) == word_WhiteSpace) {
1527 rdaddc(rs, ' ');
1528 } else if (removeattr(text->type) == word_Quote) {
1529 rdaddsc(rs, "&quot;");
1530 }
1531
1532 if (towordstyle(text->type) == word_Emph &&
1533 (attraux(text->aux) == attr_Last ||
1534 attraux(text->aux) == attr_Only))
1535 rdaddsc(rs, "</em>");
1536 else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1537 (attraux(text->aux) == attr_Last ||
1538 attraux(text->aux) == attr_Only))
1539 rdaddsc(rs, "</code>");
1540 break;
1541 }
1542 }
1543}
1544
1545/* Output a heading, formatted as XHTML.
1546 */
ce9921d6 1547static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
d7482997 1548{
1549 rdstringc t = { 0, 0, NULL };
1550 word *tprefix = p->kwtext;
1551 word *nprefix = p->kwtext2;
1552 word *text = p->words;
1553 int level = xhtml_para_level(p);
1554 xhtmlsection *sect = xhtml_find_section(p);
5d9cc07b 1555 xhtmlheadfmt *fmt;
d7482997 1556 char *fragment;
1557 if (sect) {
1558 fragment = sect->fragment;
1559 } else {
d2e74722 1560 if (p->type == para_Title)
1561 fragment = "title";
1562 else {
1563 fragment = ""; /* FIXME: what else can we do? */
1564 error(err_whatever, "Couldn't locate heading cross-reference!");
1565 }
d7482997 1566 }
1567
5d9cc07b 1568 if (p->type == para_Title)
1569 fmt = NULL;
1570 else if (level == 1)
1571 fmt = &conf.fchapter;
1572 else if (level-1 < conf.nfsect)
1573 fmt = &conf.fsect[level-1];
1574 else
1575 fmt = &conf.fsect[conf.nfsect-1];
1576
1577 if (fmt && fmt->just_numbers && nprefix) {
ce9921d6 1578 xhtml_rdaddwc(&t, nprefix, NULL, indexable);
5d9cc07b 1579 if (fmt) {
1580 char *c;
4b3c5afb 1581 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
5d9cc07b 1582 rdaddsc(&t, c);
1583 sfree(c);
1584 }
1585 }
1586 } else if (fmt && !fmt->just_numbers && tprefix) {
ce9921d6 1587 xhtml_rdaddwc(&t, tprefix, NULL, indexable);
5d9cc07b 1588 if (fmt) {
1589 char *c;
4b3c5afb 1590 if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
5d9cc07b 1591 rdaddsc(&t, c);
1592 sfree(c);
1593 }
1594 }
d7482997 1595 }
ce9921d6 1596 xhtml_rdaddwc(&t, text, NULL, indexable);
d9d3dd95 1597 /*
1598 * If we're outputting in single-file mode, we need to lower
1599 * the level of each heading by one, because the overall
1600 * document title will be sitting right at the top as an <h1>
1601 * and so chapters and sections should start at <h2>.
1602 *
1603 * Even if not, the document title will come back from
1604 * xhtml_para_level() as level zero, so we must increment that
1605 * no matter what leaf_level is set to.
1606 */
1607 if (conf.leaf_level == 0 || level == 0)
1608 level++;
d7482997 1609 fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1610 sfree(t.text);
1611}
1612
1613/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1614 * This looks pretty simple; I may have missed something ...
1615 */
ce9921d6 1616static void xhtml_para(FILE *fp, word *text, int indexable)
d7482997 1617{
1618 rdstringc out = { 0, 0, NULL };
ce9921d6 1619 xhtml_rdaddwc(&out, text, NULL, indexable);
d7482997 1620 fprintf(fp, "%s", out.text);
1621 sfree(out.text);
1622}
1623
1624/* Output a code paragraph. I'm treating this as preformatted, which
1625 * may not be entirely correct. See xhtml_para() for my worries about
1626 * this being overly-simple; however I think that most of the complexity
1627 * of the text backend came entirely out of word wrapping anyway.
1628 */
1629static void xhtml_codepara(FILE *fp, word *text)
1630{
1631 fprintf(fp, "<pre>");
1632 for (; text; text = text->next) if (text->type == word_WeakCode) {
4b3c5afb 1633 word *here, *next;
d7482997 1634 char *c;
4b3c5afb 1635
1636 /*
1637 * See if this WeakCode is followed by an Emph to indicate
1638 * emphasis.
1639 */
1640 here = text;
1641 if (text->next && text->next->type == word_Emph) {
1642 next = text = text->next;
1643 } else
1644 next = NULL;
1645
1646 if (next) {
1647 wchar_t *t, *e;
1648 int n;
1649
1650 t = here->text;
1651 e = next->text;
1652
1653 while (*e) {
1654 int ec = *e;
1655
1656 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1657 xhtml_convert(t, n, &c, FALSE);
1658 fprintf(fp, "%s%s%s",
1659 (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1660 c,
1661 (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1662 sfree(c);
1663
1664 t += n;
1665 e += n;
1666 }
1667
1668 xhtml_convert(t, 0, &c, FALSE);
1669 fprintf(fp, "%s\n", c);
1670 sfree(c);
1671 } else {
1672 xhtml_convert(here->text, 0, &c, FALSE);
1673 fprintf(fp, "%s\n", c);
1674 sfree(c);
1675 }
d7482997 1676 }
1677 fprintf(fp, "</pre>\n");
1678}