mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_xhtml.c

   1 /*
   2  * xhtml backend for Halibut
   3  * (initial implementation by James Aylett)
   4  *
   5  * Still to do:
   6  *
   7  *  +++ doesn't handle non-breaking hyphens. Not sure how to yet.
   8  *  +++ entity names (from a file -- ideally supply normal SGML files)
   9  *  +++ configuration directive to file split where the current layout
  10  *      code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
  11  *      perhaps others.
  12  *
  13  * Limitations:
  14  *
  15  *  +++ biblio/index references target the nearest section marker, rather
  16  *   than having a dedicated target themselves. In large bibliographies
  17  *   this will cause problems. (The solution is to fake up a response
  18  *   from xhtml_find_section(), probably linking it into the sections
  19  *   chain just in case we need it again, and to make freeing it up
  20  *   easier.) docsrc.pl used to work as we do, however, and SGT agrees that
  21  *   this is acceptable for now.
  22  *  +++ can't cope with leaf-level == 0. It's all to do with the
  23  *   top-level file not being normal, probably not even having a valid
  24  *   section level, and stuff like that. I question whether this is an
  25  *   issue, frankly; small manuals that fit on one page should probably
  26  *   not be written in halibut at all.
  27  */
  28
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <assert.h>
  33 #include "halibut.h"
  34
  35 /*
  36  * FILENAME_TEMPLATE (overridable in config of course) allows you
  37  * to choose the general form for your HTML file names. It is
  38  * slightly printf-styled (% followed by a single character is a
  39  * formatting directive, %% is a literal %). Formatting directives
  40  * are:
  41  *
  42  *  - %n is the section type-plus-number, minus whitespace (`Chapter1.2').
  43  *  - %b is the section number on its own (`1.2').
  44  *  - %k is the section's _internal_ keyword.
  45  *  - %N is the section's visible title in the output, again minus
  46  *    whitespace.
  47  *
  48  * %n, %b and %k will all default to %N if the section is
  49  * unnumbered (`Bibliography' is often a good example).
  50  */
  51
  52 #define FILENAME_SINGLE "Manual.html"
  53 #define FILENAME_CONTENTS "Contents.html"
  54 #define FILENAME_INDEX "IndexPage.html"
  55 #define FILENAME_TEMPLATE "%n.html"
  56
  57 struct xhtmlsection_Struct {
  58     struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
  59     struct xhtmlsection_Struct *child; /* NULL if split across files */
  60     struct xhtmlsection_Struct *parent; /* NULL if split across files */
  61     struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
  62     paragraph *para;
  63     struct xhtmlfile_Struct *file; /* which file is this a part of? */
  64     char *fragment; /* fragment id within the file */
  65     int level;
  66 };
  67
  68 struct xhtmlfile_Struct {
  69     struct xhtmlfile_Struct *next;
  70     struct xhtmlfile_Struct *child;
  71     struct xhtmlfile_Struct *parent;
  72     char *filename;
  73     struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
  74     int is_leaf; /* is this file a leaf file, ie does it not have any children? */
  75 };
  76
  77 typedef struct xhtmlsection_Struct xhtmlsection;
  78 typedef struct xhtmlfile_Struct xhtmlfile;
  79 typedef struct xhtmlindex_Struct xhtmlindex;
  80
  81 struct xhtmlindex_Struct {
  82   int nsection;
  83   int size;
  84   xhtmlsection **sections;
  85 };
  86
  87 typedef struct {
  88     int just_numbers;
  89     wchar_t *number_suffix;
  90 } xhtmlheadfmt;
  91
  92 typedef struct {
  93   int contents_depth[6];
  94   int leaf_contains_contents;
  95   int leaf_level;
  96   int leaf_smallest_contents;
  97   int include_version_id;
  98   wchar_t *author, *description;
  99   wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
 100   int suppress_address;
 101   xhtmlheadfmt fchapter, *fsect;
 102   int nfsect;
 103   char *contents_filename, *index_filename;
 104   char *single_filename, *template_filename;
 105 } xhtmlconfig;
 106
 107 /*static void xhtml_level(paragraph *, int);
 108 static void xhtml_level_0(paragraph *);
 109 static void xhtml_docontents(FILE *, paragraph *, int);
 110 static void xhtml_dosections(FILE *, paragraph *, int);
 111 static void xhtml_dobody(FILE *, paragraph *, int);*/
 112
 113 static void xhtml_doheader(FILE *, word *);
 114 static void xhtml_dofooter(FILE *);
 115 static void xhtml_versionid(FILE *, word *, int);
 116
 117 static void xhtml_utostr(wchar_t *, char **);
 118 static int xhtml_para_level(paragraph *);
 119 static int xhtml_reservedchar(int);
 120
 121 static int xhtml_convert(wchar_t *, int, char **, int);
 122 static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
 123 static void xhtml_para(FILE *, word *, int);
 124 static void xhtml_codepara(FILE *, word *);
 125 static void xhtml_heading(FILE *, paragraph *, int);
 126
 127 /* File-global variables are much easier than passing these things
 128  * all over the place. Evil, but easier. We can replace this with a single
 129  * structure at some point.
 130  */
 131 static xhtmlconfig conf;
 132 static keywordlist *keywords;
 133 static indexdata *idx;
 134 static xhtmlfile *topfile;
 135 static xhtmlsection *topsection;
 136 static paragraph *sourceparas;
 137 static xhtmlfile *lastfile;
 138 static xhtmlfile *xhtml_last_file = NULL;
 139 static int last_level=-1, start_level;
 140 static xhtmlsection *currentsection;
 141
 142 static xhtmlconfig xhtml_configure(paragraph *source)
 143 {
 144   xhtmlconfig ret;
 145
 146   /*
 147    * Defaults.
 148    */
 149   ret.contents_depth[0] = 2;
 150   ret.contents_depth[1] = 3;
 151   ret.contents_depth[2] = 4;
 152   ret.contents_depth[3] = 5;
 153   ret.contents_depth[4] = 6;
 154   ret.contents_depth[5] = 7;
 155   ret.leaf_level = 2;
 156   ret.leaf_smallest_contents = 4;
 157   ret.leaf_contains_contents = FALSE;
 158   ret.include_version_id = TRUE;
 159   ret.author = NULL;
 160   ret.description = NULL;
 161   ret.head_end = NULL;
 162   ret.body = NULL;
 163   ret.body_start = NULL;
 164   ret.body_end = NULL;
 165   ret.address_start = NULL;
 166   ret.address_end = NULL;
 167   ret.nav_attrs = NULL;
 168   ret.suppress_address = FALSE;
 169
 170   ret.fchapter.just_numbers = FALSE;
 171   ret.fchapter.number_suffix = L": ";
 172   ret.nfsect = 2;
 173   ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
 174   ret.fsect[0].just_numbers = FALSE;
 175   ret.fsect[0].number_suffix = L": ";
 176   ret.fsect[1].just_numbers = TRUE;
 177   ret.fsect[1].number_suffix = L" ";
 178   ret.contents_filename = strdup(FILENAME_CONTENTS);
 179   ret.single_filename = strdup(FILENAME_SINGLE);
 180   ret.index_filename = strdup(FILENAME_INDEX);
 181   ret.template_filename = strdup(FILENAME_TEMPLATE);
 182
 183   for (; source; source = source->next)
 184   {
 185     if (source->type == para_Config)
 186     {
 187       if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
 188         sfree(ret.contents_filename);
 189         ret.contents_filename = utoa_dup(uadv(source->keyword));
 190       } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
 191         sfree(ret.single_filename);
 192         ret.single_filename = utoa_dup(uadv(source->keyword));
 193       } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
 194         sfree(ret.index_filename);
 195         ret.index_filename = utoa_dup(uadv(source->keyword));
 196       } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
 197         sfree(ret.template_filename);
 198         ret.template_filename = utoa_dup(uadv(source->keyword));
 199       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
 200         ret.contents_depth[0] = utoi(uadv(source->keyword));
 201       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
 202         ret.contents_depth[1] = utoi(uadv(source->keyword));
 203       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
 204         ret.contents_depth[2] = utoi(uadv(source->keyword));
 205       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
 206         ret.contents_depth[3] = utoi(uadv(source->keyword));
 207       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
 208         ret.contents_depth[4] = utoi(uadv(source->keyword));
 209       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
 210         ret.contents_depth[5] = utoi(uadv(source->keyword));
 211       } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
 212         ret.leaf_level = utoi(uadv(source->keyword));
 213       } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
 214         ret.leaf_smallest_contents = utoi(uadv(source->keyword));
 215       } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
 216         ret.include_version_id = utob(uadv(source->keyword));
 217       } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
 218         ret.leaf_contains_contents = utob(uadv(source->keyword));
 219       } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
 220         ret.suppress_address = utob(uadv(source->keyword));
 221       } else if (!ustricmp(source->keyword, L"xhtml-author")) {
 222         ret.author = uadv(source->keyword);
 223       } else if (!ustricmp(source->keyword, L"xhtml-description")) {
 224         ret.description = uadv(source->keyword);
 225       } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
 226         ret.head_end = uadv(source->keyword);
 227       } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
 228         ret.body_start = uadv(source->keyword);
 229       } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
 230         ret.body = uadv(source->keyword);
 231       } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
 232         ret.body_end = uadv(source->keyword);
 233       } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
 234         ret.address_start = uadv(source->keyword);
 235       } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
 236         ret.address_end = uadv(source->keyword);
 237       } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
 238         ret.nav_attrs = uadv(source->keyword);
 239       } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
 240         ret.fchapter.just_numbers = utob(uadv(source->keyword));
 241       } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
 242         ret.fchapter.number_suffix = uadv(source->keyword);
 243       } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
 244         wchar_t *p = uadv(source->keyword);
 245         int n = 0;
 246         if (uisdigit(*p)) {
 247           n = utoi(p);
 248           p = uadv(p);
 249         }
 250         if (n >= ret.nfsect) {
 251           int i;
 252           ret.fsect = resize(ret.fsect, n+1);
 253           for (i = ret.nfsect; i <= n; i++)
 254             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 255           ret.nfsect = n+1;
 256         }
 257         ret.fsect[n].just_numbers = utob(p);
 258       } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
 259         wchar_t *p = uadv(source->keyword);
 260         int n = 0;
 261         if (uisdigit(*p)) {
 262           n = utoi(p);
 263           p = uadv(p);
 264         }
 265         if (n >= ret.nfsect) {
 266           int i;
 267           ret.fsect = resize(ret.fsect, n+1);
 268           for (i = ret.nfsect; i <= n; i++)
 269             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 270           ret.nfsect = n+1;
 271         }
 272         ret.fsect[n].number_suffix = p;
 273       }
 274     }
 275   }
 276
 277   /*  printf(" !!! leaf_level = %i\n", ret.leaf_level);
 278   printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
 279   printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
 280   printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
 281   printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
 282   printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
 283   printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
 284   printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
 285   return ret;
 286 }
 287
 288 paragraph *xhtml_config_filename(char *filename)
 289 {
 290     /*
 291      * If the user passes in a single filename as a parameter to
 292      * the `--html' command-line option, then we should assume it
 293      * to imply _two_ config directives:
 294      * \cfg{xhtml-single-filename}{whatever} and
 295      * \cfg{xhtml-leaf-level}{0}; the rationale being that the user
 296      * wants their output _in that file_.
 297      */
 298
 299     paragraph *p[2];
 300     int i, len;
 301     wchar_t *ufilename, *up;
 302
 303     for (i = 0; i < 2; i++) {
 304         p[i] = mknew(paragraph);
 305         memset(p[i], 0, sizeof(*p[i]));
 306         p[i]->type = para_Config;
 307         p[i]->next = NULL;
 308         p[i]->fpos.filename = "<command line>";
 309         p[i]->fpos.line = p[i]->fpos.col = -1;
 310     }
 311
 312     ufilename = ufroma_dup(filename);
 313     len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename");
 314     p[0]->keyword = mknewa(wchar_t, len);
 315     up = p[0]->keyword;
 316     ustrcpy(up, L"xhtml-single-filename");
 317     up = uadv(up);
 318     ustrcpy(up, ufilename);
 319     up = uadv(up);
 320     *up = L'\0';
 321     assert(up - p[0]->keyword < len);
 322     sfree(ufilename);
 323
 324     len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1;
 325     p[1]->keyword = mknewa(wchar_t, len);
 326     up = p[1]->keyword;
 327     ustrcpy(up, L"xhtml-leaf-level");
 328     up = uadv(up);
 329     ustrcpy(up, L"0");
 330     up = uadv(up);
 331     *up = L'\0';
 332     assert(up - p[1]->keyword < len);
 333
 334     p[0]->next = p[1];
 335
 336     return p[0];
 337 }
 338
 339 static xhtmlsection *xhtml_new_section(xhtmlsection *last)
 340 {
 341   xhtmlsection *ret = mknew(xhtmlsection);
 342   ret->next=NULL;
 343   ret->child=NULL;
 344   ret->parent=NULL;
 345   ret->chain=last;
 346   ret->para=NULL;
 347   ret->file=NULL;
 348   ret->fragment=NULL;
 349   ret->level=-1; /* marker: end of chain */
 350   return ret;
 351 }
 352
 353 /* Returns NULL or the section that marks that paragraph */
 354 static xhtmlsection *xhtml_find_section(paragraph *p)
 355 {
 356   xhtmlsection *ret = topsection;
 357   if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
 358     paragraph *p2 = sourceparas;
 359     paragraph *p3 = NULL;
 360     while (p2 && p2!=p) {
 361       if (xhtml_para_level(p2)!=-1) {
 362         p3 = p2;
 363       }
 364       p2=p2->next;
 365     }
 366     if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
 367       /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
 368        * So don't do that, then.
 369        */
 370       return NULL;
 371     }
 372     p=p3;
 373   }
 374   while (ret && ret->para != p) {
 375 /*    printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
 376     ret=ret->chain;
 377   }
 378   return ret;
 379 }
 380
 381 static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
 382 {
 383   xhtmlfile *ret = mknew(xhtmlfile);
 384
 385   ret->next=NULL;
 386   ret->child=NULL;
 387   ret->parent=NULL;
 388   ret->filename=NULL;
 389   ret->sections=sect;
 390   ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
 391   if (sect==NULL) {
 392     if (conf.leaf_level==0) { /* currently unused */
 393       ret->filename = smalloc(strlen(conf.single_filename)+1);
 394       sprintf(ret->filename, conf.single_filename);
 395     } else {
 396       ret->filename = smalloc(strlen(conf.contents_filename)+1);
 397       sprintf(ret->filename, conf.contents_filename);
 398     }
 399   } else {
 400     paragraph *p = sect->para;
 401     rdstringc fname_c = { 0, 0, NULL };
 402     char *c, *t;
 403     word *w;
 404     wchar_t *ws;
 405
 406     t = conf.template_filename;
 407     while (*t) {
 408       if (*t == '%' && t[1]) {
 409         int fmt;
 410
 411         t++;
 412         fmt = *t++;
 413
 414         if (fmt == '%') {
 415           rdaddc(&fname_c, fmt);
 416           continue;
 417         }
 418
 419         w = NULL;
 420         ws = NULL;
 421
 422         if (p->kwtext && fmt == 'n')
 423           w = p->kwtext;
 424         else if (p->kwtext2 && fmt == 'b')
 425           w = p->kwtext2;
 426         else if (p->keyword && *p->keyword && fmt == 'k')
 427           ws = p->keyword;
 428         else
 429           w = p->words;
 430
 431         while (w) {
 432           switch (removeattr(w->type))
 433           {
 434            case word_Normal:
 435             /*case word_Emph:
 436              case word_Code:
 437              case word_WeakCode:*/
 438             xhtml_utostr(w->text, &c);
 439             rdaddsc(&fname_c,c);
 440             sfree(c);
 441             break;
 442           }
 443           w = w->next;
 444         }
 445         if (ws) {
 446           xhtml_utostr(ws, &c);
 447           rdaddsc(&fname_c,c);
 448           sfree(c);
 449         }
 450       } else {
 451         rdaddc(&fname_c, *t++);
 452       }
 453     }
 454
 455     ret->filename = rdtrimc(&fname_c);
 456   }
 457   /*  printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
 458   return ret;
 459 }
 460
 461 /*
 462  * Walk the tree fixing up files which are actually leaf (ie
 463  * have no children) but aren't at leaf level, so they have the
 464  * leaf flag set.
 465  */
 466 void xhtml_fixup_layout(xhtmlfile* file)
 467 {
 468   if (file->child==NULL) {
 469     file->is_leaf = TRUE;
 470   } else {
 471     xhtml_fixup_layout(file->child);
 472   }
 473   if (file->next)
 474     xhtml_fixup_layout(file->next);
 475 }
 476
 477 /*
 478  * Create the tree structure so we know where everything goes.
 479  * Method:
 480  *
 481  * Ignoring file splitting, we have three choices with each new section:
 482  *
 483  * +-----------------+-----------------+
 484  * |                 |                 |
 485  * X            +----X----+           (1)
 486  *              |         |
 487  *              Y        (2)
 488  *              |
 489  *             (3)
 490  *
 491  * Y is the last section we added (currentsect).
 492  * If sect is the section we want to add, then:
 493  *
 494  * (1) if sect->level < currentsect->level
 495  * (2) if sect->level == currentsect->level
 496  * (3) if sect->level > currentsect->level
 497  *
 498  * This requires the constraint that you never skip section numbers
 499  * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
 500  *
 501  * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
 502  * more than one level at a time. Lots of asserts, and probably part of
 503  * the algorithm here, rely on this being true. (It currently isn't
 504  * enforced by halibut, however.)
 505  *
 506  * File splitting makes this harder. For instance, say we added at (3)
 507  * above and now need to add another section. We are splitting at level
 508  * 2, ie the level of Y. Z is the last section we added:
 509  *
 510  * +-----------------+-----------------+
 511  * |                 |                 |
 512  * X            +----X----+           (1)
 513  *              |         |
 514  *         +----Y----+   (1)
 515  *         |         |
 516  *         Z        (2)
 517  *         |
 518  *        (3)
 519  *
 520  * The (1) case is now split; we need to search upwards to find where
 521  * to actually link in. The other two cases remain the same (and will
 522  * always be like this).
 523  *
 524  * File splitting makes this harder, however. The decision of whether
 525  * to split to a new file is always on the same condition, however (is
 526  * the level of this section higher than the leaf_level configuration
 527  * value or not).
 528  *
 529  * Treating the cases backwards:
 530  *
 531  * (3) same file if sect->level > conf.leaf_level, otherwise new file
 532  *
 533  *     if in the same file, currentsect->child points to sect
 534  *     otherwise the linking is done through the file tree (which works
 535  *     in more or less the same way, ie currentfile->child points to
 536  *     the new file)
 537  *
 538  * (2) same file if sect->level > conf.leaf_level, otherwise new file
 539  *
 540  *     if in the same file, currentsect->next points to sect
 541  *     otherwise file linking and currentfile->next points to the new
 542  *     file (we know that Z must have caused a new file to be created)
 543  *
 544  * (1) same file if sect->level > conf.leaf_level, otherwise new file
 545  *
 546  *     this is actually effectively the same case as (2) here,
 547  *     except that we first have to travel up the sections to figure
 548  *     out which section this new one will be a sibling of. In doing
 549  *     so, we may disappear off the top of a file and have to go up
 550  *     to its parent in the file tree.
 551  *
 552  */
 553 static void xhtml_ponder_layout(paragraph *p)
 554 {
 555   xhtmlsection *lastsection;
 556   xhtmlsection *currentsect;
 557   xhtmlfile *currentfile;
 558
 559   lastfile = NULL;
 560   topsection = xhtml_new_section(NULL);
 561   topfile = xhtml_new_file(NULL);
 562   lastsection = topsection;
 563   currentfile = topfile;
 564   currentsect = topsection;
 565
 566   if (conf.leaf_level == 0) {
 567     topfile->is_leaf = 1;
 568     topfile->sections = topsection;
 569     topsection->file = topfile;
 570   }
 571
 572   for (; p; p=p->next)
 573   {
 574     int level = xhtml_para_level(p);
 575     if (level>0) /* actually a section */
 576     {
 577       xhtmlsection *sect;
 578       word *w;
 579       char *c;
 580       rdstringc fname_c = { 0, 0, NULL };
 581
 582       sect = xhtml_new_section(lastsection);
 583       lastsection = sect;
 584       sect->para = p;
 585       for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
 586       {
 587         switch (removeattr(w->type))
 588         {
 589         case word_Normal:
 590          /*case word_Emph:
 591          case word_Code:
 592          case word_WeakCode:*/
 593           xhtml_utostr(w->text, &c);
 594           rdaddsc(&fname_c,c);
 595           sfree(c);
 596           break;
 597         }
 598       }
 599 /*      rdaddsc(&fname_c, ".html");*/
 600       sect->fragment = rdtrimc(&fname_c);
 601       sect->level = level;
 602       /*      printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
 603
 604       if (level>currentsect->level) { /* case (3) */
 605         if (level>conf.leaf_level) { /* same file */
 606           assert(currentfile->is_leaf);
 607           currentsect->child = sect;
 608           sect->parent=currentsect;
 609           sect->file=currentfile;
 610           /*          printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
 611           currentsect=sect;
 612         } else { /* new file */
 613           xhtmlfile *file = xhtml_new_file(sect);
 614           assert(!currentfile->is_leaf);
 615           currentfile->child=file;
 616           sect->file=file;
 617           file->parent=currentfile;
 618           /*          printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
 619           currentfile=file;
 620           currentsect=sect;
 621         }
 622       } else if (level >= currentsect->file->sections->level) {
 623         /* Case (1) or (2) *AND* still under the section that starts
 624          * the current file.
 625          *
 626          * I'm not convinced that this couldn't be rolled in with the
 627          * final else {} leg further down. It seems a lot of effort
 628          * this way.
 629          */
 630         if (level>conf.leaf_level) { /* stick within the same file */
 631           assert(currentfile->is_leaf);
 632           sect->file = currentfile;
 633           while (currentsect && currentsect->level > level &&
 634                  currentsect->file==currentsect->parent->file) {
 635             currentsect = currentsect->parent;
 636           }
 637           assert(currentsect);
 638           currentsect->next = sect;
 639           assert(currentsect->level == sect->level);
 640           sect->parent = currentsect->parent;
 641           currentsect = sect;
 642           /*          printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
 643         } else { /* new file */
 644           xhtmlfile *file = xhtml_new_file(sect);
 645           sect->file=file;
 646           currentfile->next=file;
 647           file->parent=currentfile->parent;
 648           file->is_leaf=(level==conf.leaf_level);
 649           file->sections=sect;
 650           /*          printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
 651           currentfile=file;
 652           currentsect=sect;
 653         }
 654       } else { /* Case (1) or (2) and we must move up the file tree first */
 655         /* this loop is now probably irrelevant - we know we can't connect
 656          * to anything in the current file */
 657         while (currentsect && level<currentsect->level) {
 658           currentsect=currentsect->parent;
 659           if (currentsect) {
 660             /*            printf(" * up one level to '%s'\n", currentsect->fragment);*/
 661           } else {
 662             /*            printf(" * up one level (off top of current file)\n");*/
 663           }
 664         }
 665         if (currentsect) {
 666           /* I'm pretty sure this can now never fire */
 667           assert(currentfile->is_leaf);
 668           /*          printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
 669           sect->file = currentfile;
 670           currentsect->next=sect;
 671           currentsect=sect;
 672         } else { /* find a file we can attach to */
 673           while (currentfile && currentfile->sections && level<currentfile->sections->level) {
 674             currentfile=currentfile->parent;
 675             if (currentfile) {
 676               /*              printf(" * up one file level to '%s'\n", currentfile->filename);*/
 677             } else {
 678               /*              printf(" * up one file level (off top of tree)\n");*/
 679             }
 680           }
 681           if (currentfile) { /* new file (we had to skip up a file to
 682                                 get here, so we must be dealing with a
 683                                 level no lower than the configured
 684                                 leaf_level */
 685             xhtmlfile *file = xhtml_new_file(sect);
 686             currentfile->next=file;
 687             sect->file=file;
 688             file->parent=currentfile->parent;
 689             file->is_leaf=(level==conf.leaf_level);
 690             file->sections=sect;
 691             /*            printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
 692             currentfile=file;
 693             currentsect=sect;
 694           } else {
 695             fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
 696           }
 697         }
 698       }
 699     }
 700   }
 701   topsection = lastsection; /* get correct end of the chain */
 702   xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
 703 }
 704
 705 static void xhtml_do_index();
 706 static void xhtml_do_file(xhtmlfile *file);
 707 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
 708 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
 709 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
 710 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
 711 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
 712 static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
 713 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
 714 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
 715
 716 /*
 717  * Do all the files in this structure.
 718  */
 719 static void xhtml_do_files(xhtmlfile *file)
 720 {
 721   xhtml_do_file(file);
 722   if (file->child)
 723     xhtml_do_files(file->child);
 724   if (file->next)
 725     xhtml_do_files(file->next);
 726 }
 727
 728 /*
 729  * Free up all memory used by the file tree from 'xfile' downwards
 730  */
 731 static void xhtml_free_file(xhtmlfile* xfile)
 732 {
 733   if (xfile==NULL) {
 734     return;
 735   }
 736
 737   if (xfile->filename) {
 738     sfree(xfile->filename);
 739   }
 740   xhtml_free_file(xfile->child);
 741   xhtml_free_file(xfile->next);
 742   sfree(xfile);
 743 }
 744
 745 /*
 746  * Main function.
 747  */
 748 void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
 749                    indexdata *in_idx)
 750 {
 751 /*  int i;*/
 752   indexentry *ientry;
 753   int ti;
 754   xhtmlsection *xsect;
 755
 756   sourceparas = sourceform;
 757   conf = xhtml_configure(sourceform);
 758   keywords = in_keywords;
 759   idx = in_idx;
 760
 761   /* Clear up the index entries backend data pointers */
 762   for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 763     ientry->backend_data=NULL;
 764   }
 765
 766   xhtml_ponder_layout(sourceform);
 767
 768   /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
 769 /*  xhtml_level_0(sourceform);
 770   for (i=1; i<=conf.leaf_level; i++)
 771   {
 772     xhtml_level(sourceform, i);
 773   }*/
 774
 775   /* new system ... (writes to *.html, but isn't fully trusted) */
 776   xhtml_do_top_file(topfile, sourceform);
 777   assert(!topfile->next); /* shouldn't have a sibling at all */
 778   if (topfile->child) {
 779     xhtml_do_files(topfile->child);
 780     xhtml_do_index();
 781   }
 782
 783   /* release file, section, index data structures */
 784   xsect = topsection;
 785   while (xsect) {
 786     xhtmlsection *tmp = xsect->chain;
 787     if (xsect->fragment) {
 788       sfree(xsect->fragment);
 789     }
 790     sfree(xsect);
 791     xsect = tmp;
 792   }
 793   xhtml_free_file(topfile);
 794   for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 795     if (ientry->backend_data!=NULL) {
 796       xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
 797       if (xi->sections!=NULL) {
 798         sfree(xi->sections);
 799       }
 800       sfree(xi);
 801     }
 802     ientry->backend_data = NULL;
 803   }
 804   sfree(conf.fsect);
 805 }
 806
 807 static int xhtml_para_level(paragraph *p)
 808 {
 809   switch (p->type)
 810   {
 811   case para_Title:
 812     return 0;
 813     break;
 814   case para_UnnumberedChapter:
 815   case para_Chapter:
 816   case para_Appendix:
 817     return 1;
 818     break;
 819 /*  case para_BiblioCited:
 820     return 2;
 821     break;*/
 822   case para_Heading:
 823   case para_Subsect:
 824     return p->aux+2;
 825     break;
 826   default:
 827     return -1;
 828     break;
 829   }
 830 }
 831
 832 /* Output the nav links for the current file.
 833  * file == NULL means we're doing the index
 834  */
 835 static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
 836 {
 837   xhtmlfile *xhtml_next_file = NULL;
 838   fprintf(fp, "<p");
 839   if (conf.nav_attrs!=NULL) {
 840     fprintf(fp, " %ls>", conf.nav_attrs);
 841   } else {
 842     fprintf(fp, ">");
 843   }
 844   if (xhtml_last_file==NULL) {
 845     fprintf(fp, "Previous | ");
 846   } else {
 847     fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
 848   }
 849   fprintf(fp, "<a href='%s'>Contents</a> | ", conf.contents_filename);
 850   if (file == NULL) {
 851     fprintf(fp, "Index | ");
 852   } else {
 853     fprintf(fp, "<a href='%s'>Index</a> | ", conf.index_filename);
 854   }
 855   if (file != NULL) { /* otherwise we're doing nav links for the index */
 856     if (xhtml_next_file==NULL)
 857       xhtml_next_file = file->child;
 858     if (xhtml_next_file==NULL)
 859       xhtml_next_file = file->next;
 860     if (xhtml_next_file==NULL)
 861       xhtml_next_file = file->parent->next;
 862   }
 863   if (xhtml_next_file==NULL) {
 864     if (file==NULL) { /* index, so no next file */
 865       fprintf(fp, "Next ");
 866     } else {
 867       fprintf(fp, "<a href='%s'>Next</a>", conf.index_filename);
 868     }
 869   } else {
 870     fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
 871   }
 872   fprintf(fp, "</p>\n");
 873 }
 874
 875 /* Write out the index file */
 876 static void xhtml_do_index_body(FILE *fp)
 877 {
 878   indexentry *y;
 879   int ti;
 880
 881   if (count234(idx->entries) == 0)
 882     return;                            /* don't write anything at all */
 883
 884   fprintf(fp, "<dl>\n");
 885   /* iterate over idx->entries using the tree functions and display everything */
 886   for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
 887     if (y->backend_data) {
 888       int i;
 889       xhtmlindex *xi;
 890
 891       fprintf(fp, "<dt>");
 892       xhtml_para(fp, y->text, FALSE);
 893       fprintf(fp, "</dt>\n<dd>");
 894
 895       xi = (xhtmlindex*) y->backend_data;
 896       for (i=0; i<xi->nsection; i++) {
 897         xhtmlsection *sect = xi->sections[i];
 898         if (sect) {
 899           fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
 900           if (sect->para->kwtext) {
 901             xhtml_para(fp, sect->para->kwtext, FALSE);
 902           } else if (sect->para->words) {
 903             xhtml_para(fp, sect->para->words, FALSE);
 904           }
 905           fprintf(fp, "</a>");
 906           if (i+1<xi->nsection) {
 907             fprintf(fp, ", ");
 908           }
 909         }
 910       }
 911       fprintf(fp, "</dd>\n");
 912     }
 913   }
 914   fprintf(fp, "</dl>\n");
 915 }
 916 static void xhtml_do_index()
 917 {
 918   word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
 919   FILE *fp = fopen(conf.index_filename, "w");
 920
 921   if (fp==NULL)
 922     fatal(err_cantopenw, conf.index_filename);
 923   xhtml_doheader(fp, &temp_word);
 924   xhtml_donavlinks(fp, NULL);
 925
 926   xhtml_do_index_body(fp);
 927
 928   xhtml_donavlinks(fp, NULL);
 929   xhtml_dofooter(fp);
 930   fclose(fp);
 931 }
 932
 933 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
 934 static void xhtml_do_file(xhtmlfile *file)
 935 {
 936   FILE *fp = fopen(file->filename, "w");
 937   if (fp==NULL)
 938     fatal(err_cantopenw, file->filename);
 939
 940   if (file->sections->para->words) {
 941     xhtml_doheader(fp, file->sections->para->words);
 942   } else if (file->sections->para->kwtext) {
 943     xhtml_doheader(fp, file->sections->para->kwtext);
 944   } else {
 945     xhtml_doheader(fp, NULL);
 946   }
 947
 948   xhtml_donavlinks(fp, file);
 949
 950   if (file->is_leaf && conf.leaf_contains_contents &&
 951       xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
 952     xhtml_do_contents(fp, file);
 953   xhtml_do_sections(fp, file->sections);
 954   if (!file->is_leaf)
 955     xhtml_do_naked_contents(fp, file);
 956
 957   xhtml_donavlinks(fp, file);
 958
 959   xhtml_dofooter(fp);
 960   fclose(fp);
 961
 962   xhtml_last_file = file;
 963 }
 964
 965 /* Output the top-level file. */
 966 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
 967 {
 968   paragraph *p;
 969   int done=FALSE;
 970   FILE *fp = fopen(file->filename, "w");
 971   if (fp==NULL)
 972     fatal(err_cantopenw, file->filename);
 973
 974   /* Do the title -- only one allowed */
 975   for (p = sourceform; p && !done; p = p->next)
 976   {
 977     if (p->type == para_Title)
 978     {
 979       xhtml_doheader(fp, p->words);
 980       done=TRUE;
 981     }
 982   }
 983   if (!done)
 984     xhtml_doheader(fp, NULL /* Eek! */);
 985
 986   /*
 987    * Display the title.
 988    */
 989   for (p = sourceform; p; p = p->next)
 990   {
 991     if (p->type == para_Title) {
 992       xhtml_heading(fp, p, FALSE);
 993       break;
 994     }
 995   }
 996
 997   /* Do the preamble */
 998   for (p = sourceform; p; p = p->next)
 999   {
1000     if (p->type == para_Chapter || p->type == para_Heading ||
1001         p->type == para_Subsect || p->type == para_Appendix ||
1002         p->type == para_UnnumberedChapter) {
1003         /*
1004          * We've found the end of the preamble. Do every normal
1005          * paragraph up to there.
1006          */
1007         xhtml_do_paras(fp, sourceform, p, FALSE);
1008         break;
1009     }
1010   }
1011
1012   xhtml_do_contents(fp, file);
1013   xhtml_do_sections(fp, file->sections);
1014
1015   /*
1016    * Put the index in the top file if we're in single-file mode
1017    * (leaf-level 0).
1018    */
1019   if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
1020     fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
1021     xhtml_do_index_body(fp);
1022   }
1023
1024   xhtml_dofooter(fp);
1025   fclose(fp);
1026 }
1027
1028 /* Convert a Unicode string to an ASCII one. '?' is
1029  * used for unmappable characters.
1030  */
1031 static void xhtml_utostr(wchar_t *in, char **out)
1032 {
1033   int l = ustrlen(in);
1034   int i;
1035   *out = smalloc(l+1);
1036   for (i=0; i<l; i++)
1037   {
1038     if (in[i]>=32 && in[i]<=126)
1039       (*out)[i]=(char)in[i];
1040     else
1041       (*out)[i]='?';
1042   }
1043   (*out)[i]=0;
1044 }
1045
1046 /*
1047  * Write contents for the given file, and subfiles, down to
1048  * the appropriate contents depth. Returns the number of
1049  * entries written.
1050  */
1051 static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
1052 {
1053   int level, limit, count = 0;
1054   if (!file)
1055     return 0;
1056
1057   level = (file->sections)?(file->sections->level):(0);
1058   limit = conf.contents_depth[(level>5)?(5):(level)];
1059   start_level = (file->is_leaf) ? (level-1) : (level);
1060   last_level = start_level;
1061
1062   count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1063   count += xhtml_do_contents_limit(fp, file->child, limit);
1064   if (fp!=NULL) {
1065     while (last_level > start_level) {
1066       last_level--;
1067       fprintf(fp, "</li></ul>\n");
1068     }
1069   }
1070   return count;
1071 }
1072
1073 /* As above, but doesn't do anything in the current file */
1074 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
1075 {
1076   int level, limit, start_level, count = 0;
1077   if (!file)
1078     return 0;
1079
1080   level = (file->sections)?(file->sections->level):(0);
1081   limit = conf.contents_depth[(level>5)?(5):(level)];
1082   start_level = (file->is_leaf) ? (level-1) : (level);
1083   last_level = start_level;
1084
1085   count = xhtml_do_contents_limit(fp, file->child, limit);
1086   if (fp!=NULL) {
1087     while (last_level > start_level) {
1088       last_level--;
1089       fprintf(fp, "</li></ul>\n");
1090     }
1091   }
1092   return count;
1093 }
1094
1095 /*
1096  * Write contents for the given file, children, and siblings, down to
1097  * given limit contents depth.
1098  */
1099 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
1100 {
1101   int count = 0;
1102   while (file) {
1103     count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1104     count += xhtml_do_contents_limit(fp, file->child, limit);
1105     file = file->next;
1106   }
1107   return count;
1108 }
1109
1110 /*
1111  * Write contents entries for the given section tree, down to the
1112  * limit contents depth.
1113  */
1114 static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
1115 {
1116   int count = 0;
1117   while (section) {
1118     if (!xhtml_add_contents_entry(fp, section, limit))
1119       return 0;
1120     else
1121       count++;
1122     count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1123     section = section->next;
1124   }
1125   return count;
1126 }
1127
1128 /*
1129  * Write contents entries for the given section tree, down to the
1130  * limit contents depth.
1131  */
1132 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1133 {
1134   int count = 0;
1135   if (!section)
1136     return 0;
1137   xhtml_add_contents_entry(fp, section, limit);
1138   count=1;
1139   count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1140   /*  section=section->child;
1141   while (section && xhtml_add_contents_entry(fp, section, limit)) {
1142     section = section->next;
1143     }*/
1144   return count;
1145 }
1146
1147 /*
1148  * Add a section entry, unless we're exceeding the limit, in which
1149  * case return FALSE (otherwise return TRUE).
1150  */
1151 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1152 {
1153   if (!section || section->level > limit)
1154     return FALSE;
1155   if (fp==NULL || section->level < 0)
1156     return TRUE;
1157   if (last_level > section->level) {
1158     while (last_level > section->level) {
1159       last_level--;
1160       fprintf(fp, "</li></ul>\n");
1161     }
1162     fprintf(fp, "</li>\n");
1163   } else if (last_level < section->level) {
1164     assert(last_level == section->level - 1);
1165     last_level++;
1166     fprintf(fp, "<ul>\n");
1167   } else {
1168     fprintf(fp, "</li>\n");
1169   }
1170   fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1171   if (section->para->kwtext) {
1172     xhtml_para(fp, section->para->kwtext, FALSE);
1173     if (section->para->words) {
1174       fprintf(fp, ": ");
1175     }
1176   }
1177   if (section->para->words) {
1178     xhtml_para(fp, section->para->words, FALSE);
1179   }
1180   fprintf(fp, "</a>\n");
1181   return TRUE;
1182 }
1183
1184 /*
1185  * Write all the sections in this file. Do all paragraphs in this section, then all
1186  * children (recursively), then go on to the next one (tail recursively).
1187  */
1188 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1189 {
1190   while (sections) {
1191     currentsection = sections;
1192     xhtml_do_paras(fp, sections->para, NULL, TRUE);
1193     xhtml_do_sections(fp, sections->child);
1194     sections = sections->next;
1195   }
1196 }
1197
1198 /* Write this list of paragraphs. Close off all lists at the end. */
1199 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1200                            int indexable)
1201 {
1202   int last_type = -1, ptype, first=TRUE;
1203   stack lcont_stack = stk_new();
1204   if (!p)
1205     return;
1206
1207 /*  for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1208   for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
1209     first=FALSE;
1210     switch (ptype = p->type)
1211     {
1212       /*
1213        * Things we ignore because we've already processed them or
1214        * aren't going to touch them in this pass.
1215        */
1216      case para_IM:
1217      case para_BR:
1218      case para_Biblio:                 /* only touch BiblioCited */
1219      case para_VersionID:
1220      case para_NoCite:
1221      case para_Title:
1222        break;
1223
1224        /*
1225         * Chapter titles.
1226         */
1227       case para_Chapter:
1228       case para_Appendix:
1229       case para_UnnumberedChapter:
1230         xhtml_heading(fp, p, indexable);
1231         break;
1232
1233       case para_Heading:
1234       case para_Subsect:
1235         xhtml_heading(fp, p, indexable);
1236         break;
1237
1238       case para_Rule:
1239         fprintf(fp, "\n<hr />\n");
1240         break;
1241
1242       case para_Normal:
1243       case para_Copyright:
1244         fprintf(fp, "\n<p>");
1245         xhtml_para(fp, p->words, indexable);
1246         fprintf(fp, "</p>\n");
1247         break;
1248
1249       case para_LcontPush:
1250         {
1251             int *p;
1252             p = mknew(int);
1253             *p = last_type;
1254             stk_push(lcont_stack, p);
1255             last_type = para_Normal;
1256         }
1257         break;
1258       case para_LcontPop:
1259         {
1260             int *p = stk_pop(lcont_stack);
1261             assert(p);
1262             ptype = last_type = *p;
1263             sfree(p);
1264             goto closeofflist;         /* ick */
1265         }
1266         break;
1267       case para_QuotePush:
1268         fprintf(fp, "<blockquote>\n");
1269         break;
1270       case para_QuotePop:
1271         fprintf(fp, "</blockquote>\n");
1272         break;
1273
1274       case para_Bullet:
1275       case para_NumberedList:
1276       case para_Description:
1277       case para_DescribedThing:
1278       case para_BiblioCited:
1279         if (last_type!=p->type &&
1280             !(last_type==para_DescribedThing && p->type==para_Description) &&
1281             !(last_type==para_Description && p->type==para_DescribedThing)) {
1282           /* start up list if necessary */
1283           if (p->type == para_Bullet) {
1284             fprintf(fp, "<ul>\n");
1285           } else if (p->type == para_NumberedList) {
1286             fprintf(fp, "<ol>\n");
1287           } else if (p->type == para_BiblioCited ||
1288                      p->type == para_DescribedThing ||
1289                      p->type == para_Description) {
1290             fprintf(fp, "<dl>\n");
1291           }
1292         }
1293         if (p->type == para_Bullet || p->type == para_NumberedList) {
1294           fprintf(fp, "<li>");
1295         } else if (p->type == para_DescribedThing) {
1296           fprintf(fp, "<dt>");
1297         } else if (p->type == para_Description) {
1298           fprintf(fp, "<dd>");
1299         } else if (p->type == para_BiblioCited) {
1300           fprintf(fp, "<dt>");
1301           xhtml_para(fp, p->kwtext, indexable);
1302           fprintf(fp, "</dt>\n<dd>");
1303         }
1304         xhtml_para(fp, p->words, indexable);
1305         {
1306           paragraph *p2 = p->next;
1307           if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1308             break;
1309         }
1310
1311         closeofflist:
1312         if (ptype == para_BiblioCited) {
1313           fprintf(fp, "</dd>\n");
1314         } else if (ptype == para_DescribedThing) {
1315           fprintf(fp, "</dt>");
1316         } else if (ptype == para_Description) {
1317           fprintf(fp, "</dd>");
1318         } else if (ptype == para_Bullet || ptype == para_NumberedList) {
1319           fprintf(fp, "</li>");
1320         }
1321         if (ptype == para_Bullet || ptype == para_NumberedList ||
1322             ptype == para_BiblioCited || ptype == para_Description ||
1323             ptype == para_DescribedThing)
1324           /* close off list if necessary */
1325         {
1326           paragraph *p2 = p->next;
1327           int close_off=FALSE;
1328 /*          if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1329           if (p2 && xhtml_para_level(p2)==-1) {
1330             if (p2->type != ptype &&
1331                 !(p2->type==para_DescribedThing && ptype==para_Description) &&
1332                 !(p2->type==para_Description && ptype==para_DescribedThing) &&
1333                 p2->type != para_LcontPush)
1334               close_off=TRUE;
1335           } else {
1336             close_off=TRUE;
1337           }
1338           if (close_off) {
1339             if (ptype == para_Bullet) {
1340               fprintf(fp, "</ul>\n");
1341             } else if (ptype == para_NumberedList) {
1342               fprintf(fp, "</ol>\n");
1343             } else if (ptype == para_BiblioCited ||
1344                        ptype == para_Description ||
1345                        ptype == para_DescribedThing) {
1346               fprintf(fp, "</dl>\n");
1347             }
1348           }
1349         }
1350         break;
1351
1352       case para_Code:
1353         xhtml_codepara(fp, p->words);
1354         break;
1355     }
1356     last_type = ptype;
1357   }
1358
1359   stk_free(lcont_stack);
1360 }
1361
1362 /*
1363  * Output a header for this XHTML file.
1364  */
1365 static void xhtml_doheader(FILE *fp, word *title)
1366 {
1367   fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1368   fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1369   fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1370   if (title==NULL)
1371     fprintf(fp, "The thing with no name!");
1372   else
1373     xhtml_para(fp, title, FALSE);
1374   fprintf(fp, "</title>\n");
1375   fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1376   if (conf.author)
1377     fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1378   if (conf.description)
1379     fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1380   if (conf.head_end)
1381     fprintf(fp, "%ls\n", conf.head_end);
1382   fprintf(fp, "</head>\n\n");
1383   if (conf.body)
1384     fprintf(fp, "%ls\n", conf.body);
1385   else
1386     fprintf(fp, "<body>\n");
1387   if (conf.body_start)
1388     fprintf(fp, "%ls\n", conf.body_start);
1389 }
1390
1391 /*
1392  * Output a footer for this XHTML file.
1393  */
1394 static void xhtml_dofooter(FILE *fp)
1395 {
1396   fprintf(fp, "\n<hr />\n\n");
1397   if (conf.body_end)
1398     fprintf(fp, "%ls\n", conf.body_end);
1399   if (!conf.suppress_address) {
1400     fprintf(fp,"<address>\n");
1401     if (conf.address_start)
1402       fprintf(fp, "%ls\n", conf.address_start);
1403     /* Do the version ID */
1404     if (conf.include_version_id) {
1405       paragraph *p;
1406       int started = 0;
1407       for (p = sourceparas; p; p = p->next)
1408         if (p->type == para_VersionID) {
1409           xhtml_versionid(fp, p->words, started);
1410           started = 1;
1411         }
1412     }
1413     if (conf.address_end)
1414       fprintf(fp, "%ls\n", conf.address_end);
1415     fprintf(fp, "</address>\n");
1416   }
1417   fprintf(fp, "</body>\n\n</html>\n");
1418 }
1419
1420 /*
1421  * Output the versionid paragraph. Typically this is a version control
1422  * ID string (such as $Id...$ in RCS).
1423  */
1424 static void xhtml_versionid(FILE *fp, word *text, int started)
1425 {
1426   rdstringc t = { 0, 0, NULL };
1427
1428   rdaddc(&t, '[');                     /* FIXME: configurability */
1429   xhtml_rdaddwc(&t, text, NULL, FALSE);
1430   rdaddc(&t, ']');                     /* FIXME: configurability */
1431
1432   if (started)
1433     fprintf(fp, "<br />\n");
1434   fprintf(fp, "%s\n", t.text);
1435   sfree(t.text);
1436 }
1437
1438 /* Is this an XHTML reserved character? */
1439 static int xhtml_reservedchar(int c)
1440 {
1441   if (c=='&' || c=='<' || c=='>' || c=='"')
1442     return TRUE;
1443   else
1444     return FALSE;
1445 }
1446
1447 /*
1448  * Convert a wide string into valid XHTML: Anything outside ASCII will
1449  * be fixed up as an entity. Currently we don't worry about constraining the
1450  * encoded character set, which we should probably do at some point (we can
1451  * still fix up and return FALSE - see the last comment here). We also don't
1452  * currently
1453  *
1454  * Because this is only used for words, spaces are HARD spaces (any other
1455  * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1456  * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1457  * rule).
1458  *
1459  * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1460  * it in `*result'. If `result' is NULL, merely checks whether all
1461  * characters in the string are feasible.
1462  *
1463  * Return is nonzero if all characters are OK. If not all
1464  * characters are OK but `result' is non-NULL, a result _will_
1465  * still be generated!
1466  */
1467 static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1468                          int hard_spaces) {
1469     int doing = (result != 0);
1470     int ok = TRUE;
1471     char *p = NULL;
1472     int plen = 0, psize = 0;
1473
1474     if (maxlen <= 0)
1475         maxlen = -1;
1476
1477     for (; *s && maxlen != 0; s++, maxlen--) {
1478         wchar_t c = *s;
1479
1480 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1481
1482         if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1483             /* Char is OK. */
1484             if (doing)
1485             {
1486               ensure_size(plen);
1487               p[plen++] = (char)c;
1488             }
1489         } else {
1490             /* Char needs fixing up. */
1491             /* ok = FALSE; -- currently we never return FALSE; we
1492              * might want to when considering a character set for the
1493              * encoded document.
1494              */
1495             if (doing)
1496             {
1497               if (c==32) { /* a space in a word is a hard space */
1498                 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1499                 sprintf(p+plen, "&nbsp;");
1500                 plen+=6;
1501               } else {
1502                 /* FIXME: entity names! */
1503                 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1504                 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1505               }
1506             }
1507         }
1508     }
1509     if (doing) {
1510         p = resize(p, plen+1);
1511         p[plen] = '\0';
1512         *result = p;
1513     }
1514     return ok;
1515 }
1516
1517 /*
1518  * This formats the given words as XHTML.
1519  *
1520  * `indexable', if FALSE, prohibits adding any index references.
1521  * You might use this, for example, if an index reference occurred
1522  * in a section title, to prevent phony index references when the
1523  * section title is processed in strange places such as contents
1524  * sections.
1525  */
1526 static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
1527     char *c;
1528     keyword *kwl;
1529     xhtmlsection *sect;
1530     indextag *itag;
1531     int ti;
1532
1533     for (; text && text != end; text = text->next) {
1534       switch (text->type) {
1535       case word_HyperLink:
1536         xhtml_utostr(text->text, &c);
1537         rdaddsc(rs, "<a href=\"");
1538         rdaddsc(rs, c);
1539         rdaddsc(rs, "\">");
1540         sfree(c);
1541         break;
1542
1543       case word_UpperXref:
1544       case word_LowerXref:
1545         kwl = kw_lookup(keywords, text->text);
1546         if (kwl) {
1547           sect=xhtml_find_section(kwl->para);
1548           if (sect) {
1549             rdaddsc(rs, "<a href=\"");
1550             rdaddsc(rs, sect->file->filename);
1551             rdaddc(rs, '#');
1552             rdaddsc(rs, sect->fragment);
1553             rdaddsc(rs, "\">");
1554           } else {
1555             rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1556             error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1557           }
1558         } else {
1559           rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1560           error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1561         }
1562         break;
1563
1564       case word_IndexRef: /* in theory we could make an index target here */
1565 /*        rdaddsc(rs, "<a name=\"idx-");
1566         xhtml_utostr(text->text, &c);
1567         rdaddsc(rs, c);
1568         sfree(c);
1569         rdaddsc(rs, "\"></a>");*/
1570         /* what we _do_ need to do is to fix up the backend data
1571          * for any indexentry this points to.
1572          */
1573         if (!indexable)
1574           break;
1575
1576         for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1577           /* FIXME: really ustricmp() and not ustrcmp()? */
1578           if (ustricmp(itag->name, text->text)==0) {
1579             break;
1580           }
1581         }
1582         if (itag!=NULL) {
1583           if (itag->refs!=NULL) {
1584             int i;
1585             for (i=0; i<itag->nrefs; i++) {
1586               xhtmlindex *idx_ref;
1587               indexentry *ientry;
1588
1589               ientry = itag->refs[i];
1590               if (ientry->backend_data==NULL) {
1591                 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1592                 if (idx_ref==NULL)
1593                   fatal(err_nomemory);
1594                 idx_ref->nsection = 0;
1595                 idx_ref->size = 4;
1596                 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1597                 if (idx_ref->sections==NULL)
1598                   fatal(err_nomemory);
1599                 ientry->backend_data = idx_ref;
1600               } else {
1601                 idx_ref = ientry->backend_data;
1602                 if (idx_ref->nsection+1 > idx_ref->size) {
1603                   int new_size = idx_ref->size * 2;
1604                   idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1605                   if (idx_ref->sections==NULL) {
1606                     fatal(err_nomemory);
1607                   }
1608                   idx_ref->size = new_size;
1609                 }
1610               }
1611               idx_ref->sections[idx_ref->nsection++] = currentsection;
1612 #if 0
1613 #endif
1614             }
1615           } else {
1616             fatal(err_whatever, "Index tag had no entries!");
1617           }
1618         } else {
1619           fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1620           fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1621         }
1622         break;
1623
1624       case word_HyperEnd:
1625       case word_XrefEnd:
1626         rdaddsc(rs, "</a>");
1627         break;
1628
1629       case word_Normal:
1630       case word_Emph:
1631       case word_Code:
1632       case word_WeakCode:
1633       case word_WhiteSpace:
1634       case word_EmphSpace:
1635       case word_CodeSpace:
1636       case word_WkCodeSpace:
1637       case word_Quote:
1638       case word_EmphQuote:
1639       case word_CodeQuote:
1640       case word_WkCodeQuote:
1641         assert(text->type != word_CodeQuote &&
1642                text->type != word_WkCodeQuote);
1643         if (towordstyle(text->type) == word_Emph &&
1644             (attraux(text->aux) == attr_First ||
1645              attraux(text->aux) == attr_Only))
1646             rdaddsc(rs, "<em>");
1647         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1648                  (attraux(text->aux) == attr_First ||
1649                   attraux(text->aux) == attr_Only))
1650             rdaddsc(rs, "<code>");
1651
1652         if (removeattr(text->type) == word_Normal) {
1653           if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
1654             rdaddsc(rs, c);
1655           else
1656             xhtml_rdaddwc(rs, text->alt, NULL, indexable);
1657           sfree(c);
1658         } else if (removeattr(text->type) == word_WhiteSpace) {
1659           rdaddc(rs, ' ');
1660         } else if (removeattr(text->type) == word_Quote) {
1661           rdaddsc(rs, "&quot;");
1662         }
1663
1664         if (towordstyle(text->type) == word_Emph &&
1665             (attraux(text->aux) == attr_Last ||
1666              attraux(text->aux) == attr_Only))
1667             rdaddsc(rs, "</em>");
1668         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1669                  (attraux(text->aux) == attr_Last ||
1670                   attraux(text->aux) == attr_Only))
1671             rdaddsc(rs, "</code>");
1672         break;
1673       }
1674     }
1675 }
1676
1677 /* Output a heading, formatted as XHTML.
1678  */
1679 static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
1680 {
1681     rdstringc t = { 0, 0, NULL };
1682     word *tprefix = p->kwtext;
1683     word *nprefix = p->kwtext2;
1684     word *text = p->words;
1685     int level = xhtml_para_level(p);
1686     xhtmlsection *sect = xhtml_find_section(p);
1687     xhtmlheadfmt *fmt;
1688     char *fragment;
1689     if (sect) {
1690       fragment = sect->fragment;
1691     } else {
1692       if (p->type == para_Title)
1693         fragment = "title";
1694       else {
1695         fragment = ""; /* FIXME: what else can we do? */
1696         error(err_whatever, "Couldn't locate heading cross-reference!");
1697       }
1698     }
1699
1700     if (p->type == para_Title)
1701         fmt = NULL;
1702     else if (level == 1)
1703         fmt = &conf.fchapter;
1704     else if (level-1 < conf.nfsect)
1705         fmt = &conf.fsect[level-1];
1706     else
1707         fmt = &conf.fsect[conf.nfsect-1];
1708
1709     if (fmt && fmt->just_numbers && nprefix) {
1710         xhtml_rdaddwc(&t, nprefix, NULL, indexable);
1711         if (fmt) {
1712             char *c;
1713             if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1714                 rdaddsc(&t, c);
1715                 sfree(c);
1716             }
1717         }
1718     } else if (fmt && !fmt->just_numbers && tprefix) {
1719         xhtml_rdaddwc(&t, tprefix, NULL, indexable);
1720         if (fmt) {
1721             char *c;
1722             if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1723                 rdaddsc(&t, c);
1724                 sfree(c);
1725             }
1726         }
1727     }
1728     xhtml_rdaddwc(&t, text, NULL, indexable);
1729     /*
1730      * If we're outputting in single-file mode, we need to lower
1731      * the level of each heading by one, because the overall
1732      * document title will be sitting right at the top as an <h1>
1733      * and so chapters and sections should start at <h2>.
1734      *
1735      * Even if not, the document title will come back from
1736      * xhtml_para_level() as level zero, so we must increment that
1737      * no matter what leaf_level is set to.
1738      */
1739     if (conf.leaf_level == 0 || level == 0)
1740         level++;
1741     fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1742     sfree(t.text);
1743 }
1744
1745 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1746  * This looks pretty simple; I may have missed something ...
1747  */
1748 static void xhtml_para(FILE *fp, word *text, int indexable)
1749 {
1750   rdstringc out = { 0, 0, NULL };
1751   xhtml_rdaddwc(&out, text, NULL, indexable);
1752   fprintf(fp, "%s", out.text);
1753   sfree(out.text);
1754 }
1755
1756 /* Output a code paragraph. I'm treating this as preformatted, which
1757  * may not be entirely correct. See xhtml_para() for my worries about
1758  * this being overly-simple; however I think that most of the complexity
1759  * of the text backend came entirely out of word wrapping anyway.
1760  */
1761 static void xhtml_codepara(FILE *fp, word *text)
1762 {
1763   fprintf(fp, "<pre>");
1764     for (; text; text = text->next) if (text->type == word_WeakCode) {
1765         word *here, *next;
1766         char *c;
1767
1768         /*
1769          * See if this WeakCode is followed by an Emph to indicate
1770          * emphasis.
1771          */
1772         here = text;
1773         if (text->next && text->next->type == word_Emph) {
1774             next = text = text->next;
1775         } else
1776             next = NULL;
1777
1778         if (next) {
1779             wchar_t *t, *e;
1780             int n;
1781
1782             t = here->text;
1783             e = next->text;
1784
1785             while (*e) {
1786                 int ec = *e;
1787
1788                 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1789                 xhtml_convert(t, n, &c, FALSE);
1790                 fprintf(fp, "%s%s%s",
1791                         (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1792                         c,
1793                         (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1794                 sfree(c);
1795
1796                 t += n;
1797                 e += n;
1798             }
1799
1800             xhtml_convert(t, 0, &c, FALSE);
1801             fprintf(fp, "%s\n", c);
1802             sfree(c);
1803         } else {
1804             xhtml_convert(here->text, 0, &c, FALSE);
1805             fprintf(fp, "%s\n", c);
1806             sfree(c);
1807         }
1808     }
1809   fprintf(fp, "</pre>\n");
1810 }