mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_xhtml.c

   1 /*
   2  * xhtml backend for Halibut
   3  * (initial implementation by James Aylett)
   4  *
   5  * Still to do:
   6  *
   7  *  +++ doesn't handle non-breaking hyphens. Not sure how to yet.
   8  *  +++ entity names (from a file -- ideally supply normal SGML files)
   9  *  +++ configuration directive to file split where the current layout
  10  *      code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
  11  *      perhaps others.
  12  *
  13  * Limitations:
  14  *
  15  *  +++ biblio/index references target the nearest section marker, rather
  16  *   than having a dedicated target themselves. In large bibliographies
  17  *   this will cause problems. (The solution is to fake up a response
  18  *   from xhtml_find_section(), probably linking it into the sections
  19  *   chain just in case we need it again, and to make freeing it up
  20  *   easier.) docsrc.pl used to work as we do, however, and SGT agrees that
  21  *   this is acceptable for now.
  22  *  +++ can't cope with leaf-level == 0. It's all to do with the
  23  *   top-level file not being normal, probably not even having a valid
  24  *   section level, and stuff like that. I question whether this is an
  25  *   issue, frankly; small manuals that fit on one page should probably
  26  *   not be written in halibut at all.
  27  */
  28
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <assert.h>
  33 #include "halibut.h"
  34
  35 /*
  36  * FILENAME_TEMPLATE (overridable in config of course) allows you
  37  * to choose the general form for your HTML file names. It is
  38  * slightly printf-styled (% followed by a single character is a
  39  * formatting directive, %% is a literal %). Formatting directives
  40  * are:
  41  *
  42  *  - %n is the section number, minus whitespace (`Chapter1.2').
  43  *  - %b is the section number on its own (`1.2').
  44  *  - %k is the section's _internal_ keyword.
  45  *  - %N is the section's visible title in the output, again minus
  46  *    whitespace.
  47  *
  48  * %n, %b and %k will all default to %N if the section is
  49  * unnumbered (`Bibliography' is often a good example).
  50  */
  51
  52 #define FILENAME_SINGLE "Manual.html"
  53 #define FILENAME_CONTENTS "Contents.html"
  54 #define FILENAME_INDEX "IndexPage.html"
  55 #define FILENAME_TEMPLATE "%n.html"
  56
  57 struct xhtmlsection_Struct {
  58     struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
  59     struct xhtmlsection_Struct *child; /* NULL if split across files */
  60     struct xhtmlsection_Struct *parent; /* NULL if split across files */
  61     struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
  62     paragraph *para;
  63     struct xhtmlfile_Struct *file; /* which file is this a part of? */
  64     char *fragment; /* fragment id within the file */
  65     int level;
  66 };
  67
  68 struct xhtmlfile_Struct {
  69     struct xhtmlfile_Struct *next;
  70     struct xhtmlfile_Struct *child;
  71     struct xhtmlfile_Struct *parent;
  72     char *filename;
  73     struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
  74     int is_leaf; /* is this file a leaf file, ie does it not have any children? */
  75 };
  76
  77 typedef struct xhtmlsection_Struct xhtmlsection;
  78 typedef struct xhtmlfile_Struct xhtmlfile;
  79 typedef struct xhtmlindex_Struct xhtmlindex;
  80
  81 struct xhtmlindex_Struct {
  82   int nsection;
  83   int size;
  84   xhtmlsection **sections;
  85 };
  86
  87 typedef struct {
  88     int just_numbers;
  89     wchar_t *number_suffix;
  90 } xhtmlheadfmt;
  91
  92 typedef struct {
  93   int contents_depth[6];
  94   int leaf_contains_contents;
  95   int leaf_level;
  96   int leaf_smallest_contents;
  97   int include_version_id;
  98   wchar_t *author, *description;
  99   wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
 100   int suppress_address;
 101   xhtmlheadfmt fchapter, *fsect;
 102   int nfsect;
 103   char *contents_filename, *index_filename;
 104   char *single_filename, *template_filename;
 105 } xhtmlconfig;
 106
 107 /*static void xhtml_level(paragraph *, int);
 108 static void xhtml_level_0(paragraph *);
 109 static void xhtml_docontents(FILE *, paragraph *, int);
 110 static void xhtml_dosections(FILE *, paragraph *, int);
 111 static void xhtml_dobody(FILE *, paragraph *, int);*/
 112
 113 static void xhtml_doheader(FILE *, word *);
 114 static void xhtml_dofooter(FILE *);
 115 static void xhtml_versionid(FILE *, word *, int);
 116
 117 static void xhtml_utostr(wchar_t *, char **);
 118 static int xhtml_para_level(paragraph *);
 119 static int xhtml_reservedchar(int);
 120
 121 static int xhtml_convert(wchar_t *, int, char **, int);
 122 static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
 123 static void xhtml_para(FILE *, word *, int);
 124 static void xhtml_codepara(FILE *, word *);
 125 static void xhtml_heading(FILE *, paragraph *, int);
 126
 127 /* File-global variables are much easier than passing these things
 128  * all over the place. Evil, but easier. We can replace this with a single
 129  * structure at some point.
 130  */
 131 static xhtmlconfig conf;
 132 static keywordlist *keywords;
 133 static indexdata *idx;
 134 static xhtmlfile *topfile;
 135 static xhtmlsection *topsection;
 136 static paragraph *sourceparas;
 137 static xhtmlfile *lastfile;
 138 static xhtmlfile *xhtml_last_file = NULL;
 139 static int last_level=-1, start_level;
 140 static xhtmlsection *currentsection;
 141
 142 static xhtmlconfig xhtml_configure(paragraph *source)
 143 {
 144   xhtmlconfig ret;
 145
 146   /*
 147    * Defaults.
 148    */
 149   ret.contents_depth[0] = 2;
 150   ret.contents_depth[1] = 3;
 151   ret.contents_depth[2] = 4;
 152   ret.contents_depth[3] = 5;
 153   ret.contents_depth[4] = 6;
 154   ret.contents_depth[5] = 7;
 155   ret.leaf_level = 2;
 156   ret.leaf_smallest_contents = 4;
 157   ret.leaf_contains_contents = FALSE;
 158   ret.include_version_id = TRUE;
 159   ret.author = NULL;
 160   ret.description = NULL;
 161   ret.head_end = NULL;
 162   ret.body = NULL;
 163   ret.body_start = NULL;
 164   ret.body_end = NULL;
 165   ret.address_start = NULL;
 166   ret.address_end = NULL;
 167   ret.nav_attrs = NULL;
 168   ret.suppress_address = FALSE;
 169
 170   ret.fchapter.just_numbers = FALSE;
 171   ret.fchapter.number_suffix = L": ";
 172   ret.nfsect = 2;
 173   ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
 174   ret.fsect[0].just_numbers = FALSE;
 175   ret.fsect[0].number_suffix = L": ";
 176   ret.fsect[1].just_numbers = TRUE;
 177   ret.fsect[1].number_suffix = L" ";
 178   ret.contents_filename = strdup(FILENAME_CONTENTS);
 179   ret.single_filename = strdup(FILENAME_SINGLE);
 180   ret.index_filename = strdup(FILENAME_INDEX);
 181   ret.template_filename = strdup(FILENAME_TEMPLATE);
 182
 183   for (; source; source = source->next)
 184   {
 185     if (source->type == para_Config)
 186     {
 187       if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
 188         sfree(ret.contents_filename);
 189         ret.contents_filename = utoa_dup(uadv(source->keyword));
 190       } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
 191         sfree(ret.single_filename);
 192         ret.single_filename = utoa_dup(uadv(source->keyword));
 193       } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
 194         sfree(ret.index_filename);
 195         ret.index_filename = utoa_dup(uadv(source->keyword));
 196       } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
 197         sfree(ret.template_filename);
 198         ret.template_filename = utoa_dup(uadv(source->keyword));
 199       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
 200         ret.contents_depth[0] = utoi(uadv(source->keyword));
 201       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
 202         ret.contents_depth[1] = utoi(uadv(source->keyword));
 203       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
 204         ret.contents_depth[2] = utoi(uadv(source->keyword));
 205       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
 206         ret.contents_depth[3] = utoi(uadv(source->keyword));
 207       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
 208         ret.contents_depth[4] = utoi(uadv(source->keyword));
 209       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
 210         ret.contents_depth[5] = utoi(uadv(source->keyword));
 211       } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
 212         ret.leaf_level = utoi(uadv(source->keyword));
 213       } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
 214         ret.leaf_smallest_contents = utoi(uadv(source->keyword));
 215       } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
 216         ret.include_version_id = utob(uadv(source->keyword));
 217       } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
 218         ret.leaf_contains_contents = utob(uadv(source->keyword));
 219       } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
 220         ret.suppress_address = utob(uadv(source->keyword));
 221       } else if (!ustricmp(source->keyword, L"xhtml-author")) {
 222         ret.author = uadv(source->keyword);
 223       } else if (!ustricmp(source->keyword, L"xhtml-description")) {
 224         ret.description = uadv(source->keyword);
 225       } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
 226         ret.head_end = uadv(source->keyword);
 227       } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
 228         ret.body_start = uadv(source->keyword);
 229       } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
 230         ret.body = uadv(source->keyword);
 231       } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
 232         ret.body_end = uadv(source->keyword);
 233       } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
 234         ret.address_start = uadv(source->keyword);
 235       } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
 236         ret.address_end = uadv(source->keyword);
 237       } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
 238         ret.nav_attrs = uadv(source->keyword);
 239       } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
 240         ret.fchapter.just_numbers = utob(uadv(source->keyword));
 241       } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
 242         ret.fchapter.number_suffix = uadv(source->keyword);
 243       } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
 244         wchar_t *p = uadv(source->keyword);
 245         int n = 0;
 246         if (uisdigit(*p)) {
 247           n = utoi(p);
 248           p = uadv(p);
 249         }
 250         if (n >= ret.nfsect) {
 251           int i;
 252           ret.fsect = resize(ret.fsect, n+1);
 253           for (i = ret.nfsect; i <= n; i++)
 254             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 255           ret.nfsect = n+1;
 256         }
 257         ret.fsect[n].just_numbers = utob(p);
 258       } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
 259         wchar_t *p = uadv(source->keyword);
 260         int n = 0;
 261         if (uisdigit(*p)) {
 262           n = utoi(p);
 263           p = uadv(p);
 264         }
 265         if (n >= ret.nfsect) {
 266           int i;
 267           ret.fsect = resize(ret.fsect, n+1);
 268           for (i = ret.nfsect; i <= n; i++)
 269             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 270           ret.nfsect = n+1;
 271         }
 272         ret.fsect[n].number_suffix = p;
 273       }
 274     }
 275   }
 276
 277   /*  printf(" !!! leaf_level = %i\n", ret.leaf_level);
 278   printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
 279   printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
 280   printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
 281   printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
 282   printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
 283   printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
 284   printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
 285   return ret;
 286 }
 287
 288 static xhtmlsection *xhtml_new_section(xhtmlsection *last)
 289 {
 290   xhtmlsection *ret = mknew(xhtmlsection);
 291   ret->next=NULL;
 292   ret->child=NULL;
 293   ret->parent=NULL;
 294   ret->chain=last;
 295   ret->para=NULL;
 296   ret->file=NULL;
 297   ret->fragment=NULL;
 298   ret->level=-1; /* marker: end of chain */
 299   return ret;
 300 }
 301
 302 /* Returns NULL or the section that marks that paragraph */
 303 static xhtmlsection *xhtml_find_section(paragraph *p)
 304 {
 305   xhtmlsection *ret = topsection;
 306   if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
 307     paragraph *p2 = sourceparas;
 308     paragraph *p3 = NULL;
 309     while (p2 && p2!=p) {
 310       if (xhtml_para_level(p2)!=-1) {
 311         p3 = p2;
 312       }
 313       p2=p2->next;
 314     }
 315     if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
 316       /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
 317        * So don't do that, then.
 318        */
 319       return NULL;
 320     }
 321     p=p3;
 322   }
 323   while (ret && ret->para != p) {
 324 /*    printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
 325     ret=ret->chain;
 326   }
 327   return ret;
 328 }
 329
 330 static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
 331 {
 332   xhtmlfile *ret = mknew(xhtmlfile);
 333
 334   ret->next=NULL;
 335   ret->child=NULL;
 336   ret->parent=NULL;
 337   ret->filename=NULL;
 338   ret->sections=sect;
 339   ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
 340   if (sect==NULL) {
 341     if (conf.leaf_level==0) { /* currently unused */
 342       ret->filename = smalloc(strlen(conf.single_filename)+1);
 343       sprintf(ret->filename, conf.single_filename);
 344     } else {
 345       ret->filename = smalloc(strlen(conf.contents_filename)+1);
 346       sprintf(ret->filename, conf.contents_filename);
 347     }
 348   } else {
 349     paragraph *p = sect->para;
 350     rdstringc fname_c = { 0, 0, NULL };
 351     char *c, *t;
 352     word *w;
 353     wchar_t *ws;
 354
 355     t = conf.template_filename;
 356     while (*t) {
 357       if (*t == '%' && t[1]) {
 358         int fmt;
 359
 360         t++;
 361         fmt = *t++;
 362
 363         if (fmt == '%') {
 364           rdaddc(&fname_c, fmt);
 365           continue;
 366         }
 367
 368         w = NULL;
 369         ws = NULL;
 370
 371         if (p->kwtext && fmt == 'n')
 372           w = p->kwtext;
 373         else if (p->kwtext2 && fmt == 'b')
 374           w = p->kwtext2;
 375         else if (p->keyword && *p->keyword && fmt == 'k')
 376           ws = p->keyword;
 377         else
 378           w = p->words;
 379
 380         while (w) {
 381           switch (removeattr(w->type))
 382           {
 383            case word_Normal:
 384             /*case word_Emph:
 385              case word_Code:
 386              case word_WeakCode:*/
 387             xhtml_utostr(w->text, &c);
 388             rdaddsc(&fname_c,c);
 389             sfree(c);
 390             break;
 391           }
 392           w = w->next;
 393         }
 394         if (ws) {
 395           xhtml_utostr(ws, &c);
 396           rdaddsc(&fname_c,c);
 397           sfree(c);
 398         }
 399       } else {
 400         rdaddc(&fname_c, *t++);
 401       }
 402     }
 403
 404     ret->filename = rdtrimc(&fname_c);
 405   }
 406   /*  printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
 407   return ret;
 408 }
 409
 410 /*
 411  * Walk the tree fixing up files which are actually leaf (ie
 412  * have no children) but aren't at leaf level, so they have the
 413  * leaf flag set.
 414  */
 415 void xhtml_fixup_layout(xhtmlfile* file)
 416 {
 417   if (file->child==NULL) {
 418     file->is_leaf = TRUE;
 419   } else {
 420     xhtml_fixup_layout(file->child);
 421   }
 422   if (file->next)
 423     xhtml_fixup_layout(file->next);
 424 }
 425
 426 /*
 427  * Create the tree structure so we know where everything goes.
 428  * Method:
 429  *
 430  * Ignoring file splitting, we have three choices with each new section:
 431  *
 432  * +-----------------+-----------------+
 433  * |                 |                 |
 434  * X            +----X----+           (1)
 435  *              |         |
 436  *              Y        (2)
 437  *              |
 438  *             (3)
 439  *
 440  * Y is the last section we added (currentsect).
 441  * If sect is the section we want to add, then:
 442  *
 443  * (1) if sect->level < currentsect->level
 444  * (2) if sect->level == currentsect->level
 445  * (3) if sect->level > currentsect->level
 446  *
 447  * This requires the constraint that you never skip section numbers
 448  * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
 449  *
 450  * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
 451  * more than one level at a time. Lots of asserts, and probably part of
 452  * the algorithm here, rely on this being true. (It currently isn't
 453  * enforced by halibut, however.)
 454  *
 455  * File splitting makes this harder. For instance, say we added at (3)
 456  * above and now need to add another section. We are splitting at level
 457  * 2, ie the level of Y. Z is the last section we added:
 458  *
 459  * +-----------------+-----------------+
 460  * |                 |                 |
 461  * X            +----X----+           (1)
 462  *              |         |
 463  *         +----Y----+   (1)
 464  *         |         |
 465  *         Z        (2)
 466  *         |
 467  *        (3)
 468  *
 469  * The (1) case is now split; we need to search upwards to find where
 470  * to actually link in. The other two cases remain the same (and will
 471  * always be like this).
 472  *
 473  * File splitting makes this harder, however. The decision of whether
 474  * to split to a new file is always on the same condition, however (is
 475  * the level of this section higher than the leaf_level configuration
 476  * value or not).
 477  *
 478  * Treating the cases backwards:
 479  *
 480  * (3) same file if sect->level > conf.leaf_level, otherwise new file
 481  *
 482  *     if in the same file, currentsect->child points to sect
 483  *     otherwise the linking is done through the file tree (which works
 484  *     in more or less the same way, ie currentfile->child points to
 485  *     the new file)
 486  *
 487  * (2) same file if sect->level > conf.leaf_level, otherwise new file
 488  *
 489  *     if in the same file, currentsect->next points to sect
 490  *     otherwise file linking and currentfile->next points to the new
 491  *     file (we know that Z must have caused a new file to be created)
 492  *
 493  * (1) same file if sect->level > conf.leaf_level, otherwise new file
 494  *
 495  *     this is actually effectively the same case as (2) here,
 496  *     except that we first have to travel up the sections to figure
 497  *     out which section this new one will be a sibling of. In doing
 498  *     so, we may disappear off the top of a file and have to go up
 499  *     to its parent in the file tree.
 500  *
 501  */
 502 static void xhtml_ponder_layout(paragraph *p)
 503 {
 504   xhtmlsection *lastsection;
 505   xhtmlsection *currentsect;
 506   xhtmlfile *currentfile;
 507
 508   lastfile = NULL;
 509   topsection = xhtml_new_section(NULL);
 510   topfile = xhtml_new_file(NULL);
 511   lastsection = topsection;
 512   currentfile = topfile;
 513   currentsect = topsection;
 514
 515   if (conf.leaf_level == 0) {
 516     topfile->is_leaf = 1;
 517     topfile->sections = topsection;
 518     topsection->file = topfile;
 519   }
 520
 521   for (; p; p=p->next)
 522   {
 523     int level = xhtml_para_level(p);
 524     if (level>0) /* actually a section */
 525     {
 526       xhtmlsection *sect;
 527       word *w;
 528       char *c;
 529       rdstringc fname_c = { 0, 0, NULL };
 530
 531       sect = xhtml_new_section(lastsection);
 532       lastsection = sect;
 533       sect->para = p;
 534       for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
 535       {
 536         switch (removeattr(w->type))
 537         {
 538         case word_Normal:
 539          /*case word_Emph:
 540          case word_Code:
 541          case word_WeakCode:*/
 542           xhtml_utostr(w->text, &c);
 543           rdaddsc(&fname_c,c);
 544           sfree(c);
 545           break;
 546         }
 547       }
 548 /*      rdaddsc(&fname_c, ".html");*/
 549       sect->fragment = rdtrimc(&fname_c);
 550       sect->level = level;
 551       /*      printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
 552
 553       if (level>currentsect->level) { /* case (3) */
 554         if (level>conf.leaf_level) { /* same file */
 555           assert(currentfile->is_leaf);
 556           currentsect->child = sect;
 557           sect->parent=currentsect;
 558           sect->file=currentfile;
 559           /*          printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
 560           currentsect=sect;
 561         } else { /* new file */
 562           xhtmlfile *file = xhtml_new_file(sect);
 563           assert(!currentfile->is_leaf);
 564           currentfile->child=file;
 565           sect->file=file;
 566           file->parent=currentfile;
 567           /*          printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
 568           currentfile=file;
 569           currentsect=sect;
 570         }
 571       } else if (level >= currentsect->file->sections->level) {
 572         /* Case (1) or (2) *AND* still under the section that starts
 573          * the current file.
 574          *
 575          * I'm not convinced that this couldn't be rolled in with the
 576          * final else {} leg further down. It seems a lot of effort
 577          * this way.
 578          */
 579         if (level>conf.leaf_level) { /* stick within the same file */
 580           assert(currentfile->is_leaf);
 581           sect->file = currentfile;
 582           while (currentsect && currentsect->level > level &&
 583                  currentsect->file==currentsect->parent->file) {
 584             currentsect = currentsect->parent;
 585           }
 586           assert(currentsect);
 587           currentsect->next = sect;
 588           assert(currentsect->level == sect->level);
 589           sect->parent = currentsect->parent;
 590           currentsect = sect;
 591           /*          printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
 592         } else { /* new file */
 593           xhtmlfile *file = xhtml_new_file(sect);
 594           sect->file=file;
 595           currentfile->next=file;
 596           file->parent=currentfile->parent;
 597           file->is_leaf=(level==conf.leaf_level);
 598           file->sections=sect;
 599           /*          printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
 600           currentfile=file;
 601           currentsect=sect;
 602         }
 603       } else { /* Case (1) or (2) and we must move up the file tree first */
 604         /* this loop is now probably irrelevant - we know we can't connect
 605          * to anything in the current file */
 606         while (currentsect && level<currentsect->level) {
 607           currentsect=currentsect->parent;
 608           if (currentsect) {
 609             /*            printf(" * up one level to '%s'\n", currentsect->fragment);*/
 610           } else {
 611             /*            printf(" * up one level (off top of current file)\n");*/
 612           }
 613         }
 614         if (currentsect) {
 615           /* I'm pretty sure this can now never fire */
 616           assert(currentfile->is_leaf);
 617           /*          printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
 618           sect->file = currentfile;
 619           currentsect->next=sect;
 620           currentsect=sect;
 621         } else { /* find a file we can attach to */
 622           while (currentfile && currentfile->sections && level<currentfile->sections->level) {
 623             currentfile=currentfile->parent;
 624             if (currentfile) {
 625               /*              printf(" * up one file level to '%s'\n", currentfile->filename);*/
 626             } else {
 627               /*              printf(" * up one file level (off top of tree)\n");*/
 628             }
 629           }
 630           if (currentfile) { /* new file (we had to skip up a file to
 631                                 get here, so we must be dealing with a
 632                                 level no lower than the configured
 633                                 leaf_level */
 634             xhtmlfile *file = xhtml_new_file(sect);
 635             currentfile->next=file;
 636             sect->file=file;
 637             file->parent=currentfile->parent;
 638             file->is_leaf=(level==conf.leaf_level);
 639             file->sections=sect;
 640             /*            printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
 641             currentfile=file;
 642             currentsect=sect;
 643           } else {
 644             fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
 645           }
 646         }
 647       }
 648     }
 649   }
 650   topsection = lastsection; /* get correct end of the chain */
 651   xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
 652 }
 653
 654 static void xhtml_do_index();
 655 static void xhtml_do_file(xhtmlfile *file);
 656 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
 657 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
 658 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
 659 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
 660 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
 661 static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
 662 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
 663 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
 664
 665 /*
 666  * Do all the files in this structure.
 667  */
 668 static void xhtml_do_files(xhtmlfile *file)
 669 {
 670   xhtml_do_file(file);
 671   if (file->child)
 672     xhtml_do_files(file->child);
 673   if (file->next)
 674     xhtml_do_files(file->next);
 675 }
 676
 677 /*
 678  * Free up all memory used by the file tree from 'xfile' downwards
 679  */
 680 static void xhtml_free_file(xhtmlfile* xfile)
 681 {
 682   if (xfile==NULL) {
 683     return;
 684   }
 685
 686   if (xfile->filename) {
 687     sfree(xfile->filename);
 688   }
 689   xhtml_free_file(xfile->child);
 690   xhtml_free_file(xfile->next);
 691   sfree(xfile);
 692 }
 693
 694 /*
 695  * Main function.
 696  */
 697 void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
 698                    indexdata *in_idx)
 699 {
 700 /*  int i;*/
 701   indexentry *ientry;
 702   int ti;
 703   xhtmlsection *xsect;
 704
 705   sourceparas = sourceform;
 706   conf = xhtml_configure(sourceform);
 707   keywords = in_keywords;
 708   idx = in_idx;
 709
 710   /* Clear up the index entries backend data pointers */
 711   for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 712     ientry->backend_data=NULL;
 713   }
 714
 715   xhtml_ponder_layout(sourceform);
 716
 717   /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
 718 /*  xhtml_level_0(sourceform);
 719   for (i=1; i<=conf.leaf_level; i++)
 720   {
 721     xhtml_level(sourceform, i);
 722   }*/
 723
 724   /* new system ... (writes to *.html, but isn't fully trusted) */
 725   xhtml_do_top_file(topfile, sourceform);
 726   assert(!topfile->next); /* shouldn't have a sibling at all */
 727   if (topfile->child) {
 728     xhtml_do_files(topfile->child);
 729     xhtml_do_index();
 730   }
 731
 732   /* release file, section, index data structures */
 733   xsect = topsection;
 734   while (xsect) {
 735     xhtmlsection *tmp = xsect->chain;
 736     if (xsect->fragment) {
 737       sfree(xsect->fragment);
 738     }
 739     sfree(xsect);
 740     xsect = tmp;
 741   }
 742   xhtml_free_file(topfile);
 743   for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 744     if (ientry->backend_data!=NULL) {
 745       xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
 746       if (xi->sections!=NULL) {
 747         sfree(xi->sections);
 748       }
 749       sfree(xi);
 750     }
 751     ientry->backend_data = NULL;
 752   }
 753   sfree(conf.fsect);
 754 }
 755
 756 static int xhtml_para_level(paragraph *p)
 757 {
 758   switch (p->type)
 759   {
 760   case para_Title:
 761     return 0;
 762     break;
 763   case para_UnnumberedChapter:
 764   case para_Chapter:
 765   case para_Appendix:
 766     return 1;
 767     break;
 768 /*  case para_BiblioCited:
 769     return 2;
 770     break;*/
 771   case para_Heading:
 772   case para_Subsect:
 773     return p->aux+2;
 774     break;
 775   default:
 776     return -1;
 777     break;
 778   }
 779 }
 780
 781 /* Output the nav links for the current file.
 782  * file == NULL means we're doing the index
 783  */
 784 static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
 785 {
 786   xhtmlfile *xhtml_next_file = NULL;
 787   fprintf(fp, "<p");
 788   if (conf.nav_attrs!=NULL) {
 789     fprintf(fp, " %ls>", conf.nav_attrs);
 790   } else {
 791     fprintf(fp, ">");
 792   }
 793   if (xhtml_last_file==NULL) {
 794     fprintf(fp, "Previous | ");
 795   } else {
 796     fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
 797   }
 798   fprintf(fp, "<a href='%s'>Contents</a> | ", conf.contents_filename);
 799   if (file == NULL) {
 800     fprintf(fp, "Index | ");
 801   } else {
 802     fprintf(fp, "<a href='%s'>Index</a> | ", conf.index_filename);
 803   }
 804   if (file != NULL) { /* otherwise we're doing nav links for the index */
 805     if (xhtml_next_file==NULL)
 806       xhtml_next_file = file->child;
 807     if (xhtml_next_file==NULL)
 808       xhtml_next_file = file->next;
 809     if (xhtml_next_file==NULL)
 810       xhtml_next_file = file->parent->next;
 811   }
 812   if (xhtml_next_file==NULL) {
 813     if (file==NULL) { /* index, so no next file */
 814       fprintf(fp, "Next ");
 815     } else {
 816       fprintf(fp, "<a href='%s'>Next</a>", conf.index_filename);
 817     }
 818   } else {
 819     fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
 820   }
 821   fprintf(fp, "</p>\n");
 822 }
 823
 824 /* Write out the index file */
 825 static void xhtml_do_index_body(FILE *fp)
 826 {
 827   indexentry *y;
 828   int ti;
 829
 830   if (count234(idx->entries) == 0)
 831     return;                            /* don't write anything at all */
 832
 833   fprintf(fp, "<dl>\n");
 834   /* iterate over idx->entries using the tree functions and display everything */
 835   for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
 836     if (y->backend_data) {
 837       int i;
 838       xhtmlindex *xi;
 839
 840       fprintf(fp, "<dt>");
 841       xhtml_para(fp, y->text, FALSE);
 842       fprintf(fp, "</dt>\n<dd>");
 843
 844       xi = (xhtmlindex*) y->backend_data;
 845       for (i=0; i<xi->nsection; i++) {
 846         xhtmlsection *sect = xi->sections[i];
 847         if (sect) {
 848           fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
 849           if (sect->para->kwtext) {
 850             xhtml_para(fp, sect->para->kwtext, FALSE);
 851           } else if (sect->para->words) {
 852             xhtml_para(fp, sect->para->words, FALSE);
 853           }
 854           fprintf(fp, "</a>");
 855           if (i+1<xi->nsection) {
 856             fprintf(fp, ", ");
 857           }
 858         }
 859       }
 860       fprintf(fp, "</dd>\n");
 861     }
 862   }
 863   fprintf(fp, "</dl>\n");
 864 }
 865 static void xhtml_do_index()
 866 {
 867   word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
 868   FILE *fp = fopen(conf.index_filename, "w");
 869
 870   if (fp==NULL)
 871     fatal(err_cantopenw, conf.index_filename);
 872   xhtml_doheader(fp, &temp_word);
 873   xhtml_donavlinks(fp, NULL);
 874
 875   xhtml_do_index_body(fp);
 876
 877   xhtml_donavlinks(fp, NULL);
 878   xhtml_dofooter(fp);
 879   fclose(fp);
 880 }
 881
 882 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
 883 static void xhtml_do_file(xhtmlfile *file)
 884 {
 885   FILE *fp = fopen(file->filename, "w");
 886   if (fp==NULL)
 887     fatal(err_cantopenw, file->filename);
 888
 889   if (file->sections->para->words) {
 890     xhtml_doheader(fp, file->sections->para->words);
 891   } else if (file->sections->para->kwtext) {
 892     xhtml_doheader(fp, file->sections->para->kwtext);
 893   } else {
 894     xhtml_doheader(fp, NULL);
 895   }
 896
 897   xhtml_donavlinks(fp, file);
 898
 899   if (file->is_leaf && conf.leaf_contains_contents &&
 900       xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
 901     xhtml_do_contents(fp, file);
 902   xhtml_do_sections(fp, file->sections);
 903   if (!file->is_leaf)
 904     xhtml_do_naked_contents(fp, file);
 905
 906   xhtml_donavlinks(fp, file);
 907
 908   xhtml_dofooter(fp);
 909   fclose(fp);
 910
 911   xhtml_last_file = file;
 912 }
 913
 914 /* Output the top-level file. */
 915 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
 916 {
 917   paragraph *p;
 918   int done=FALSE;
 919   FILE *fp = fopen(file->filename, "w");
 920   if (fp==NULL)
 921     fatal(err_cantopenw, file->filename);
 922
 923   /* Do the title -- only one allowed */
 924   for (p = sourceform; p && !done; p = p->next)
 925   {
 926     if (p->type == para_Title)
 927     {
 928       xhtml_doheader(fp, p->words);
 929       done=TRUE;
 930     }
 931   }
 932   if (!done)
 933     xhtml_doheader(fp, NULL /* Eek! */);
 934
 935   /*
 936    * Display the title.
 937    */
 938   for (p = sourceform; p; p = p->next)
 939   {
 940     if (p->type == para_Title) {
 941       xhtml_heading(fp, p, FALSE);
 942       break;
 943     }
 944   }
 945
 946   /* Do the preamble */
 947   for (p = sourceform; p; p = p->next)
 948   {
 949     if (p->type == para_Chapter || p->type == para_Heading ||
 950         p->type == para_Subsect || p->type == para_Appendix ||
 951         p->type == para_UnnumberedChapter) {
 952         /*
 953          * We've found the end of the preamble. Do every normal
 954          * paragraph up to there.
 955          */
 956         xhtml_do_paras(fp, sourceform, p, FALSE);
 957         break;
 958     }
 959   }
 960
 961   xhtml_do_contents(fp, file);
 962   xhtml_do_sections(fp, file->sections);
 963
 964   /*
 965    * Put the index in the top file if we're in single-file mode
 966    * (leaf-level 0).
 967    */
 968   if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
 969     fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
 970     xhtml_do_index_body(fp);
 971   }
 972
 973   xhtml_dofooter(fp);
 974   fclose(fp);
 975 }
 976
 977 /* Convert a Unicode string to an ASCII one. '?' is
 978  * used for unmappable characters.
 979  */
 980 static void xhtml_utostr(wchar_t *in, char **out)
 981 {
 982   int l = ustrlen(in);
 983   int i;
 984   *out = smalloc(l+1);
 985   for (i=0; i<l; i++)
 986   {
 987     if (in[i]>=32 && in[i]<=126)
 988       (*out)[i]=(char)in[i];
 989     else
 990       (*out)[i]='?';
 991   }
 992   (*out)[i]=0;
 993 }
 994
 995 /*
 996  * Write contents for the given file, and subfiles, down to
 997  * the appropriate contents depth. Returns the number of
 998  * entries written.
 999  */
1000 static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
1001 {
1002   int level, limit, count = 0;
1003   if (!file)
1004     return 0;
1005
1006   level = (file->sections)?(file->sections->level):(0);
1007   limit = conf.contents_depth[(level>5)?(5):(level)];
1008   start_level = (file->is_leaf) ? (level-1) : (level);
1009   last_level = start_level;
1010
1011   count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1012   count += xhtml_do_contents_limit(fp, file->child, limit);
1013   if (fp!=NULL) {
1014     while (last_level > start_level) {
1015       last_level--;
1016       fprintf(fp, "</li></ul>\n");
1017     }
1018   }
1019   return count;
1020 }
1021
1022 /* As above, but doesn't do anything in the current file */
1023 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
1024 {
1025   int level, limit, start_level, count = 0;
1026   if (!file)
1027     return 0;
1028
1029   level = (file->sections)?(file->sections->level):(0);
1030   limit = conf.contents_depth[(level>5)?(5):(level)];
1031   start_level = (file->is_leaf) ? (level-1) : (level);
1032   last_level = start_level;
1033
1034   count = xhtml_do_contents_limit(fp, file->child, limit);
1035   if (fp!=NULL) {
1036     while (last_level > start_level) {
1037       last_level--;
1038       fprintf(fp, "</li></ul>\n");
1039     }
1040   }
1041   return count;
1042 }
1043
1044 /*
1045  * Write contents for the given file, children, and siblings, down to
1046  * given limit contents depth.
1047  */
1048 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
1049 {
1050   int count = 0;
1051   while (file) {
1052     count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1053     count += xhtml_do_contents_limit(fp, file->child, limit);
1054     file = file->next;
1055   }
1056   return count;
1057 }
1058
1059 /*
1060  * Write contents entries for the given section tree, down to the
1061  * limit contents depth.
1062  */
1063 static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
1064 {
1065   int count = 0;
1066   while (section) {
1067     if (!xhtml_add_contents_entry(fp, section, limit))
1068       return 0;
1069     else
1070       count++;
1071     count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1072     section = section->next;
1073   }
1074   return count;
1075 }
1076
1077 /*
1078  * Write contents entries for the given section tree, down to the
1079  * limit contents depth.
1080  */
1081 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1082 {
1083   int count = 0;
1084   if (!section)
1085     return 0;
1086   xhtml_add_contents_entry(fp, section, limit);
1087   count=1;
1088   count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1089   /*  section=section->child;
1090   while (section && xhtml_add_contents_entry(fp, section, limit)) {
1091     section = section->next;
1092     }*/
1093   return count;
1094 }
1095
1096 /*
1097  * Add a section entry, unless we're exceeding the limit, in which
1098  * case return FALSE (otherwise return TRUE).
1099  */
1100 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1101 {
1102   if (!section || section->level > limit)
1103     return FALSE;
1104   if (fp==NULL || section->level < 0)
1105     return TRUE;
1106   if (last_level > section->level) {
1107     while (last_level > section->level) {
1108       last_level--;
1109       fprintf(fp, "</li></ul>\n");
1110     }
1111     fprintf(fp, "</li>\n");
1112   } else if (last_level < section->level) {
1113     assert(last_level == section->level - 1);
1114     last_level++;
1115     fprintf(fp, "<ul>\n");
1116   } else {
1117     fprintf(fp, "</li>\n");
1118   }
1119   fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1120   if (section->para->kwtext) {
1121     xhtml_para(fp, section->para->kwtext, FALSE);
1122     if (section->para->words) {
1123       fprintf(fp, ": ");
1124     }
1125   }
1126   if (section->para->words) {
1127     xhtml_para(fp, section->para->words, FALSE);
1128   }
1129   fprintf(fp, "</a>\n");
1130   return TRUE;
1131 }
1132
1133 /*
1134  * Write all the sections in this file. Do all paragraphs in this section, then all
1135  * children (recursively), then go on to the next one (tail recursively).
1136  */
1137 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1138 {
1139   while (sections) {
1140     currentsection = sections;
1141     xhtml_do_paras(fp, sections->para, NULL, TRUE);
1142     xhtml_do_sections(fp, sections->child);
1143     sections = sections->next;
1144   }
1145 }
1146
1147 /* Write this list of paragraphs. Close off all lists at the end. */
1148 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1149                            int indexable)
1150 {
1151   int last_type = -1, ptype, first=TRUE;
1152   stack lcont_stack = stk_new();
1153   if (!p)
1154     return;
1155
1156 /*  for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1157   for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
1158     first=FALSE;
1159     switch (ptype = p->type)
1160     {
1161       /*
1162        * Things we ignore because we've already processed them or
1163        * aren't going to touch them in this pass.
1164        */
1165      case para_IM:
1166      case para_BR:
1167      case para_Biblio:                 /* only touch BiblioCited */
1168      case para_VersionID:
1169      case para_NoCite:
1170      case para_Title:
1171        break;
1172
1173        /*
1174         * Chapter titles.
1175         */
1176       case para_Chapter:
1177       case para_Appendix:
1178       case para_UnnumberedChapter:
1179         xhtml_heading(fp, p, indexable);
1180         break;
1181
1182       case para_Heading:
1183       case para_Subsect:
1184         xhtml_heading(fp, p, indexable);
1185         break;
1186
1187       case para_Rule:
1188         fprintf(fp, "\n<hr />\n");
1189         break;
1190
1191       case para_Normal:
1192       case para_Copyright:
1193         fprintf(fp, "\n<p>");
1194         xhtml_para(fp, p->words, indexable);
1195         fprintf(fp, "</p>\n");
1196         break;
1197
1198       case para_LcontPush:
1199         {
1200             int *p;
1201             p = mknew(int);
1202             *p = last_type;
1203             stk_push(lcont_stack, p);
1204             last_type = para_Normal;
1205         }
1206         break;
1207       case para_LcontPop:
1208         {
1209             int *p = stk_pop(lcont_stack);
1210             assert(p);
1211             ptype = last_type = *p;
1212             sfree(p);
1213             goto closeofflist;         /* ick */
1214         }
1215         break;
1216       case para_QuotePush:
1217         fprintf(fp, "<blockquote>\n");
1218         break;
1219       case para_QuotePop:
1220         fprintf(fp, "</blockquote>\n");
1221         break;
1222
1223       case para_Bullet:
1224       case para_NumberedList:
1225       case para_Description:
1226       case para_DescribedThing:
1227       case para_BiblioCited:
1228         if (last_type!=p->type &&
1229             !(last_type==para_DescribedThing && p->type==para_Description) &&
1230             !(last_type==para_Description && p->type==para_DescribedThing)) {
1231           /* start up list if necessary */
1232           if (p->type == para_Bullet) {
1233             fprintf(fp, "<ul>\n");
1234           } else if (p->type == para_NumberedList) {
1235             fprintf(fp, "<ol>\n");
1236           } else if (p->type == para_BiblioCited ||
1237                      p->type == para_DescribedThing ||
1238                      p->type == para_Description) {
1239             fprintf(fp, "<dl>\n");
1240           }
1241         }
1242         if (p->type == para_Bullet || p->type == para_NumberedList) {
1243           fprintf(fp, "<li>");
1244         } else if (p->type == para_DescribedThing) {
1245           fprintf(fp, "<dt>");
1246         } else if (p->type == para_Description) {
1247           fprintf(fp, "<dd>");
1248         } else if (p->type == para_BiblioCited) {
1249           fprintf(fp, "<dt>");
1250           xhtml_para(fp, p->kwtext, indexable);
1251           fprintf(fp, "</dt>\n<dd>");
1252         }
1253         xhtml_para(fp, p->words, indexable);
1254         {
1255           paragraph *p2 = p->next;
1256           if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1257             break;
1258         }
1259
1260         closeofflist:
1261         if (ptype == para_BiblioCited) {
1262           fprintf(fp, "</dd>\n");
1263         } else if (ptype == para_DescribedThing) {
1264           fprintf(fp, "</dt>");
1265         } else if (ptype == para_Description) {
1266           fprintf(fp, "</dd>");
1267         } else if (ptype == para_Bullet || ptype == para_NumberedList) {
1268           fprintf(fp, "</li>");
1269         }
1270         if (ptype == para_Bullet || ptype == para_NumberedList ||
1271             ptype == para_BiblioCited || ptype == para_Description ||
1272             ptype == para_DescribedThing)
1273           /* close off list if necessary */
1274         {
1275           paragraph *p2 = p->next;
1276           int close_off=FALSE;
1277 /*          if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1278           if (p2 && xhtml_para_level(p2)==-1) {
1279             if (p2->type != ptype &&
1280                 !(p2->type==para_DescribedThing && ptype==para_Description) &&
1281                 !(p2->type==para_Description && ptype==para_DescribedThing) &&
1282                 p2->type != para_LcontPush)
1283               close_off=TRUE;
1284           } else {
1285             close_off=TRUE;
1286           }
1287           if (close_off) {
1288             if (ptype == para_Bullet) {
1289               fprintf(fp, "</ul>\n");
1290             } else if (ptype == para_NumberedList) {
1291               fprintf(fp, "</ol>\n");
1292             } else if (ptype == para_BiblioCited ||
1293                        ptype == para_Description ||
1294                        ptype == para_DescribedThing) {
1295               fprintf(fp, "</dl>\n");
1296             }
1297           }
1298         }
1299         break;
1300
1301       case para_Code:
1302         xhtml_codepara(fp, p->words);
1303         break;
1304     }
1305     last_type = ptype;
1306   }
1307
1308   stk_free(lcont_stack);
1309 }
1310
1311 /*
1312  * Output a header for this XHTML file.
1313  */
1314 static void xhtml_doheader(FILE *fp, word *title)
1315 {
1316   fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1317   fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1318   fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1319   if (title==NULL)
1320     fprintf(fp, "The thing with no name!");
1321   else
1322     xhtml_para(fp, title, FALSE);
1323   fprintf(fp, "</title>\n");
1324   fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1325   if (conf.author)
1326     fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1327   if (conf.description)
1328     fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1329   if (conf.head_end)
1330     fprintf(fp, "%ls\n", conf.head_end);
1331   fprintf(fp, "</head>\n\n");
1332   if (conf.body)
1333     fprintf(fp, "%ls\n", conf.body);
1334   else
1335     fprintf(fp, "<body>\n");
1336   if (conf.body_start)
1337     fprintf(fp, "%ls\n", conf.body_start);
1338 }
1339
1340 /*
1341  * Output a footer for this XHTML file.
1342  */
1343 static void xhtml_dofooter(FILE *fp)
1344 {
1345   fprintf(fp, "\n<hr />\n\n");
1346   if (conf.body_end)
1347     fprintf(fp, "%ls\n", conf.body_end);
1348   if (!conf.suppress_address) {
1349     fprintf(fp,"<address>\n");
1350     if (conf.address_start)
1351       fprintf(fp, "%ls\n", conf.address_start);
1352     /* Do the version ID */
1353     if (conf.include_version_id) {
1354       paragraph *p;
1355       int started = 0;
1356       for (p = sourceparas; p; p = p->next)
1357         if (p->type == para_VersionID) {
1358           xhtml_versionid(fp, p->words, started);
1359           started = 1;
1360         }
1361     }
1362     if (conf.address_end)
1363       fprintf(fp, "%ls\n", conf.address_end);
1364     fprintf(fp, "</address>\n");
1365   }
1366   fprintf(fp, "</body>\n\n</html>\n");
1367 }
1368
1369 /*
1370  * Output the versionid paragraph. Typically this is a version control
1371  * ID string (such as $Id...$ in RCS).
1372  */
1373 static void xhtml_versionid(FILE *fp, word *text, int started)
1374 {
1375   rdstringc t = { 0, 0, NULL };
1376
1377   rdaddc(&t, '[');                     /* FIXME: configurability */
1378   xhtml_rdaddwc(&t, text, NULL, FALSE);
1379   rdaddc(&t, ']');                     /* FIXME: configurability */
1380
1381   if (started)
1382     fprintf(fp, "<br />\n");
1383   fprintf(fp, "%s\n", t.text);
1384   sfree(t.text);
1385 }
1386
1387 /* Is this an XHTML reserved character? */
1388 static int xhtml_reservedchar(int c)
1389 {
1390   if (c=='&' || c=='<' || c=='>' || c=='"')
1391     return TRUE;
1392   else
1393     return FALSE;
1394 }
1395
1396 /*
1397  * Convert a wide string into valid XHTML: Anything outside ASCII will
1398  * be fixed up as an entity. Currently we don't worry about constraining the
1399  * encoded character set, which we should probably do at some point (we can
1400  * still fix up and return FALSE - see the last comment here). We also don't
1401  * currently
1402  *
1403  * Because this is only used for words, spaces are HARD spaces (any other
1404  * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1405  * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1406  * rule).
1407  *
1408  * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1409  * it in `*result'. If `result' is NULL, merely checks whether all
1410  * characters in the string are feasible.
1411  *
1412  * Return is nonzero if all characters are OK. If not all
1413  * characters are OK but `result' is non-NULL, a result _will_
1414  * still be generated!
1415  */
1416 static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1417                          int hard_spaces) {
1418     int doing = (result != 0);
1419     int ok = TRUE;
1420     char *p = NULL;
1421     int plen = 0, psize = 0;
1422
1423     if (maxlen <= 0)
1424         maxlen = -1;
1425
1426     for (; *s && maxlen != 0; s++, maxlen--) {
1427         wchar_t c = *s;
1428
1429 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1430
1431         if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1432             /* Char is OK. */
1433             if (doing)
1434             {
1435               ensure_size(plen);
1436               p[plen++] = (char)c;
1437             }
1438         } else {
1439             /* Char needs fixing up. */
1440             /* ok = FALSE; -- currently we never return FALSE; we
1441              * might want to when considering a character set for the
1442              * encoded document.
1443              */
1444             if (doing)
1445             {
1446               if (c==32) { /* a space in a word is a hard space */
1447                 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1448                 sprintf(p+plen, "&nbsp;");
1449                 plen+=6;
1450               } else {
1451                 /* FIXME: entity names! */
1452                 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1453                 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1454               }
1455             }
1456         }
1457     }
1458     if (doing) {
1459         p = resize(p, plen+1);
1460         p[plen] = '\0';
1461         *result = p;
1462     }
1463     return ok;
1464 }
1465
1466 /*
1467  * This formats the given words as XHTML.
1468  *
1469  * `indexable', if FALSE, prohibits adding any index references.
1470  * You might use this, for example, if an index reference occurred
1471  * in a section title, to prevent phony index references when the
1472  * section title is processed in strange places such as contents
1473  * sections.
1474  */
1475 static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
1476     char *c;
1477     keyword *kwl;
1478     xhtmlsection *sect;
1479     indextag *itag;
1480     int ti;
1481
1482     for (; text && text != end; text = text->next) {
1483       switch (text->type) {
1484       case word_HyperLink:
1485         xhtml_utostr(text->text, &c);
1486         rdaddsc(rs, "<a href=\"");
1487         rdaddsc(rs, c);
1488         rdaddsc(rs, "\">");
1489         sfree(c);
1490         break;
1491
1492       case word_UpperXref:
1493       case word_LowerXref:
1494         kwl = kw_lookup(keywords, text->text);
1495         if (kwl) {
1496           sect=xhtml_find_section(kwl->para);
1497           if (sect) {
1498             rdaddsc(rs, "<a href=\"");
1499             rdaddsc(rs, sect->file->filename);
1500             rdaddc(rs, '#');
1501             rdaddsc(rs, sect->fragment);
1502             rdaddsc(rs, "\">");
1503           } else {
1504             rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1505             error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1506           }
1507         } else {
1508           rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1509           error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1510         }
1511         break;
1512
1513       case word_IndexRef: /* in theory we could make an index target here */
1514 /*        rdaddsc(rs, "<a name=\"idx-");
1515         xhtml_utostr(text->text, &c);
1516         rdaddsc(rs, c);
1517         sfree(c);
1518         rdaddsc(rs, "\"></a>");*/
1519         /* what we _do_ need to do is to fix up the backend data
1520          * for any indexentry this points to.
1521          */
1522         if (!indexable)
1523           break;
1524
1525         for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1526           /* FIXME: really ustricmp() and not ustrcmp()? */
1527           if (ustricmp(itag->name, text->text)==0) {
1528             break;
1529           }
1530         }
1531         if (itag!=NULL) {
1532           if (itag->refs!=NULL) {
1533             int i;
1534             for (i=0; i<itag->nrefs; i++) {
1535               xhtmlindex *idx_ref;
1536               indexentry *ientry;
1537
1538               ientry = itag->refs[i];
1539               if (ientry->backend_data==NULL) {
1540                 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1541                 if (idx_ref==NULL)
1542                   fatal(err_nomemory);
1543                 idx_ref->nsection = 0;
1544                 idx_ref->size = 4;
1545                 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1546                 if (idx_ref->sections==NULL)
1547                   fatal(err_nomemory);
1548                 ientry->backend_data = idx_ref;
1549               } else {
1550                 idx_ref = ientry->backend_data;
1551                 if (idx_ref->nsection+1 > idx_ref->size) {
1552                   int new_size = idx_ref->size * 2;
1553                   idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1554                   if (idx_ref->sections==NULL) {
1555                     fatal(err_nomemory);
1556                   }
1557                   idx_ref->size = new_size;
1558                 }
1559               }
1560               idx_ref->sections[idx_ref->nsection++] = currentsection;
1561 #if 0
1562 #endif
1563             }
1564           } else {
1565             fatal(err_whatever, "Index tag had no entries!");
1566           }
1567         } else {
1568           fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1569           fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1570         }
1571         break;
1572
1573       case word_HyperEnd:
1574       case word_XrefEnd:
1575         rdaddsc(rs, "</a>");
1576         break;
1577
1578       case word_Normal:
1579       case word_Emph:
1580       case word_Code:
1581       case word_WeakCode:
1582       case word_WhiteSpace:
1583       case word_EmphSpace:
1584       case word_CodeSpace:
1585       case word_WkCodeSpace:
1586       case word_Quote:
1587       case word_EmphQuote:
1588       case word_CodeQuote:
1589       case word_WkCodeQuote:
1590         assert(text->type != word_CodeQuote &&
1591                text->type != word_WkCodeQuote);
1592         if (towordstyle(text->type) == word_Emph &&
1593             (attraux(text->aux) == attr_First ||
1594              attraux(text->aux) == attr_Only))
1595             rdaddsc(rs, "<em>");
1596         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1597                  (attraux(text->aux) == attr_First ||
1598                   attraux(text->aux) == attr_Only))
1599             rdaddsc(rs, "<code>");
1600
1601         if (removeattr(text->type) == word_Normal) {
1602           if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
1603             rdaddsc(rs, c);
1604           else
1605             xhtml_rdaddwc(rs, text->alt, NULL, indexable);
1606           sfree(c);
1607         } else if (removeattr(text->type) == word_WhiteSpace) {
1608           rdaddc(rs, ' ');
1609         } else if (removeattr(text->type) == word_Quote) {
1610           rdaddsc(rs, "&quot;");
1611         }
1612
1613         if (towordstyle(text->type) == word_Emph &&
1614             (attraux(text->aux) == attr_Last ||
1615              attraux(text->aux) == attr_Only))
1616             rdaddsc(rs, "</em>");
1617         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1618                  (attraux(text->aux) == attr_Last ||
1619                   attraux(text->aux) == attr_Only))
1620             rdaddsc(rs, "</code>");
1621         break;
1622       }
1623     }
1624 }
1625
1626 /* Output a heading, formatted as XHTML.
1627  */
1628 static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
1629 {
1630     rdstringc t = { 0, 0, NULL };
1631     word *tprefix = p->kwtext;
1632     word *nprefix = p->kwtext2;
1633     word *text = p->words;
1634     int level = xhtml_para_level(p);
1635     xhtmlsection *sect = xhtml_find_section(p);
1636     xhtmlheadfmt *fmt;
1637     char *fragment;
1638     if (sect) {
1639       fragment = sect->fragment;
1640     } else {
1641       if (p->type == para_Title)
1642         fragment = "title";
1643       else {
1644         fragment = ""; /* FIXME: what else can we do? */
1645         error(err_whatever, "Couldn't locate heading cross-reference!");
1646       }
1647     }
1648
1649     if (p->type == para_Title)
1650         fmt = NULL;
1651     else if (level == 1)
1652         fmt = &conf.fchapter;
1653     else if (level-1 < conf.nfsect)
1654         fmt = &conf.fsect[level-1];
1655     else
1656         fmt = &conf.fsect[conf.nfsect-1];
1657
1658     if (fmt && fmt->just_numbers && nprefix) {
1659         xhtml_rdaddwc(&t, nprefix, NULL, indexable);
1660         if (fmt) {
1661             char *c;
1662             if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1663                 rdaddsc(&t, c);
1664                 sfree(c);
1665             }
1666         }
1667     } else if (fmt && !fmt->just_numbers && tprefix) {
1668         xhtml_rdaddwc(&t, tprefix, NULL, indexable);
1669         if (fmt) {
1670             char *c;
1671             if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1672                 rdaddsc(&t, c);
1673                 sfree(c);
1674             }
1675         }
1676     }
1677     xhtml_rdaddwc(&t, text, NULL, indexable);
1678     /*
1679      * If we're outputting in single-file mode, we need to lower
1680      * the level of each heading by one, because the overall
1681      * document title will be sitting right at the top as an <h1>
1682      * and so chapters and sections should start at <h2>.
1683      *
1684      * Even if not, the document title will come back from
1685      * xhtml_para_level() as level zero, so we must increment that
1686      * no matter what leaf_level is set to.
1687      */
1688     if (conf.leaf_level == 0 || level == 0)
1689         level++;
1690     fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1691     sfree(t.text);
1692 }
1693
1694 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1695  * This looks pretty simple; I may have missed something ...
1696  */
1697 static void xhtml_para(FILE *fp, word *text, int indexable)
1698 {
1699   rdstringc out = { 0, 0, NULL };
1700   xhtml_rdaddwc(&out, text, NULL, indexable);
1701   fprintf(fp, "%s", out.text);
1702   sfree(out.text);
1703 }
1704
1705 /* Output a code paragraph. I'm treating this as preformatted, which
1706  * may not be entirely correct. See xhtml_para() for my worries about
1707  * this being overly-simple; however I think that most of the complexity
1708  * of the text backend came entirely out of word wrapping anyway.
1709  */
1710 static void xhtml_codepara(FILE *fp, word *text)
1711 {
1712   fprintf(fp, "<pre>");
1713     for (; text; text = text->next) if (text->type == word_WeakCode) {
1714         word *here, *next;
1715         char *c;
1716
1717         /*
1718          * See if this WeakCode is followed by an Emph to indicate
1719          * emphasis.
1720          */
1721         here = text;
1722         if (text->next && text->next->type == word_Emph) {
1723             next = text = text->next;
1724         } else
1725             next = NULL;
1726
1727         if (next) {
1728             wchar_t *t, *e;
1729             int n;
1730
1731             t = here->text;
1732             e = next->text;
1733
1734             while (*e) {
1735                 int ec = *e;
1736
1737                 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1738                 xhtml_convert(t, n, &c, FALSE);
1739                 fprintf(fp, "%s%s%s",
1740                         (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1741                         c,
1742                         (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1743                 sfree(c);
1744
1745                 t += n;
1746                 e += n;
1747             }
1748
1749             xhtml_convert(t, 0, &c, FALSE);
1750             fprintf(fp, "%s\n", c);
1751             sfree(c);
1752         } else {
1753             xhtml_convert(here->text, 0, &c, FALSE);
1754             fprintf(fp, "%s\n", c);
1755             sfree(c);
1756         }
1757     }
1758   fprintf(fp, "</pre>\n");
1759 }