mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_xhtml.c

   1 /*
   2  * xhtml backend for Halibut
   3  * (initial implementation by James Aylett)
   4  *
   5  * Still to do:
   6  *
   7  *  +++ doesn't handle non-breaking hyphens. Not sure how to yet.
   8  *  +++ entity names (from a file -- ideally supply normal SGML files)
   9  *  +++ configuration directive to file split where the current layout
  10  *      code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
  11  *      perhaps others.
  12  *
  13  * Limitations:
  14  *
  15  *  +++ biblio/index references target the nearest section marker, rather
  16  *   than having a dedicated target themselves. In large bibliographies
  17  *   this will cause problems. (The solution is to fake up a response
  18  *   from xhtml_find_section(), probably linking it into the sections
  19  *   chain just in case we need it again, and to make freeing it up
  20  *   easier.) docsrc.pl used to work as we do, however, and SGT agrees that
  21  *   this is acceptable for now.
  22  *  +++ can't cope with leaf-level == 0. It's all to do with the
  23  *   top-level file not being normal, probably not even having a valid
  24  *   section level, and stuff like that. I question whether this is an
  25  *   issue, frankly; small manuals that fit on one page should probably
  26  *   not be written in halibut at all.
  27  */
  28
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <assert.h>
  33 #include "halibut.h"
  34
  35 /*
  36  * FILENAME_TEMPLATE (overridable in config of course) allows you
  37  * to choose the general form for your HTML file names. It is
  38  * slightly printf-styled (% followed by a single character is a
  39  * formatting directive, %% is a literal %). Formatting directives
  40  * are:
  41  *
  42  *  - %n is the section type-plus-number, minus whitespace (`Chapter1.2').
  43  *  - %b is the section number on its own (`1.2').
  44  *  - %k is the section's _internal_ keyword.
  45  *  - %N is the section's visible title in the output, again minus
  46  *    whitespace.
  47  *
  48  * %n, %b and %k will all default to %N if the section is
  49  * unnumbered (`Bibliography' is often a good example).
  50  *
  51  * FRAGMENT_TEMPLATE is the same, but defines the <a name="foo">
  52  * markers used to cross-reference to particular subsections of a
  53  * file.
  54  */
  55
  56 #define FILENAME_SINGLE "Manual.html"
  57 #define FILENAME_CONTENTS "Contents.html"
  58 #define FILENAME_INDEX "IndexPage.html"
  59 #define FILENAME_TEMPLATE "%n.html"
  60 #define FRAGMENT_TEMPLATE "%b"
  61
  62 struct xhtmlsection_Struct {
  63     struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
  64     struct xhtmlsection_Struct *child; /* NULL if split across files */
  65     struct xhtmlsection_Struct *parent; /* NULL if split across files */
  66     struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
  67     paragraph *para;
  68     struct xhtmlfile_Struct *file; /* which file is this a part of? */
  69     char *fragment; /* fragment id within the file */
  70     int level;
  71 };
  72
  73 struct xhtmlfile_Struct {
  74     struct xhtmlfile_Struct *next;
  75     struct xhtmlfile_Struct *child;
  76     struct xhtmlfile_Struct *parent;
  77     char *filename;
  78     struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
  79     int is_leaf; /* is this file a leaf file, ie does it not have any children? */
  80 };
  81
  82 typedef struct xhtmlsection_Struct xhtmlsection;
  83 typedef struct xhtmlfile_Struct xhtmlfile;
  84 typedef struct xhtmlindex_Struct xhtmlindex;
  85
  86 struct xhtmlindex_Struct {
  87   int nsection;
  88   int size;
  89   xhtmlsection **sections;
  90 };
  91
  92 typedef struct {
  93     int just_numbers;
  94     wchar_t *number_suffix;
  95 } xhtmlheadfmt;
  96
  97 typedef struct {
  98   int contents_depth[6];
  99   int leaf_contains_contents;
 100   int leaf_level;
 101   int leaf_smallest_contents;
 102   int include_version_id;
 103   wchar_t *author, *description;
 104   wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
 105   int suppress_address;
 106   xhtmlheadfmt fchapter, *fsect;
 107   int nfsect;
 108   char *contents_filename, *index_filename;
 109   char *single_filename, *template_filename, *template_fragment;
 110 } xhtmlconfig;
 111
 112 /*static void xhtml_level(paragraph *, int);
 113 static void xhtml_level_0(paragraph *);
 114 static void xhtml_docontents(FILE *, paragraph *, int);
 115 static void xhtml_dosections(FILE *, paragraph *, int);
 116 static void xhtml_dobody(FILE *, paragraph *, int);*/
 117
 118 static void xhtml_doheader(FILE *, word *);
 119 static void xhtml_dofooter(FILE *);
 120 static void xhtml_versionid(FILE *, word *, int);
 121
 122 static void xhtml_utostr(wchar_t *, char **);
 123 static int xhtml_para_level(paragraph *);
 124 static int xhtml_reservedchar(int);
 125
 126 static int xhtml_convert(wchar_t *, int, char **, int);
 127 static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
 128 static void xhtml_para(FILE *, word *, int);
 129 static void xhtml_codepara(FILE *, word *);
 130 static void xhtml_heading(FILE *, paragraph *, int);
 131
 132 /* File-global variables are much easier than passing these things
 133  * all over the place. Evil, but easier. We can replace this with a single
 134  * structure at some point.
 135  */
 136 static xhtmlconfig conf;
 137 static keywordlist *keywords;
 138 static indexdata *idx;
 139 static xhtmlfile *topfile;
 140 static xhtmlsection *topsection;
 141 static paragraph *sourceparas;
 142 static xhtmlfile *lastfile;
 143 static xhtmlfile *xhtml_last_file = NULL;
 144 static int last_level=-1, start_level;
 145 static xhtmlsection *currentsection;
 146
 147 static xhtmlconfig xhtml_configure(paragraph *source)
 148 {
 149   xhtmlconfig ret;
 150
 151   /*
 152    * Defaults.
 153    */
 154   ret.contents_depth[0] = 2;
 155   ret.contents_depth[1] = 3;
 156   ret.contents_depth[2] = 4;
 157   ret.contents_depth[3] = 5;
 158   ret.contents_depth[4] = 6;
 159   ret.contents_depth[5] = 7;
 160   ret.leaf_level = 2;
 161   ret.leaf_smallest_contents = 4;
 162   ret.leaf_contains_contents = FALSE;
 163   ret.include_version_id = TRUE;
 164   ret.author = NULL;
 165   ret.description = NULL;
 166   ret.head_end = NULL;
 167   ret.body = NULL;
 168   ret.body_start = NULL;
 169   ret.body_end = NULL;
 170   ret.address_start = NULL;
 171   ret.address_end = NULL;
 172   ret.nav_attrs = NULL;
 173   ret.suppress_address = FALSE;
 174
 175   ret.fchapter.just_numbers = FALSE;
 176   ret.fchapter.number_suffix = L": ";
 177   ret.nfsect = 2;
 178   ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
 179   ret.fsect[0].just_numbers = FALSE;
 180   ret.fsect[0].number_suffix = L": ";
 181   ret.fsect[1].just_numbers = TRUE;
 182   ret.fsect[1].number_suffix = L" ";
 183   ret.contents_filename = strdup(FILENAME_CONTENTS);
 184   ret.single_filename = strdup(FILENAME_SINGLE);
 185   ret.index_filename = strdup(FILENAME_INDEX);
 186   ret.template_filename = strdup(FILENAME_TEMPLATE);
 187   ret.template_fragment = strdup(FRAGMENT_TEMPLATE);
 188
 189   for (; source; source = source->next)
 190   {
 191     if (source->type == para_Config)
 192     {
 193       if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
 194         sfree(ret.contents_filename);
 195         ret.contents_filename = utoa_dup(uadv(source->keyword));
 196       } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
 197         sfree(ret.single_filename);
 198         ret.single_filename = utoa_dup(uadv(source->keyword));
 199       } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
 200         sfree(ret.index_filename);
 201         ret.index_filename = utoa_dup(uadv(source->keyword));
 202       } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
 203         sfree(ret.template_filename);
 204         ret.template_filename = utoa_dup(uadv(source->keyword));
 205       } else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) {
 206         sfree(ret.template_fragment);
 207         ret.template_fragment = utoa_dup(uadv(source->keyword));
 208       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
 209         ret.contents_depth[0] = utoi(uadv(source->keyword));
 210       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
 211         ret.contents_depth[1] = utoi(uadv(source->keyword));
 212       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
 213         ret.contents_depth[2] = utoi(uadv(source->keyword));
 214       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
 215         ret.contents_depth[3] = utoi(uadv(source->keyword));
 216       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
 217         ret.contents_depth[4] = utoi(uadv(source->keyword));
 218       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
 219         ret.contents_depth[5] = utoi(uadv(source->keyword));
 220       } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
 221         ret.leaf_level = utoi(uadv(source->keyword));
 222       } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
 223         ret.leaf_smallest_contents = utoi(uadv(source->keyword));
 224       } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
 225         ret.include_version_id = utob(uadv(source->keyword));
 226       } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
 227         ret.leaf_contains_contents = utob(uadv(source->keyword));
 228       } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
 229         ret.suppress_address = utob(uadv(source->keyword));
 230       } else if (!ustricmp(source->keyword, L"xhtml-author")) {
 231         ret.author = uadv(source->keyword);
 232       } else if (!ustricmp(source->keyword, L"xhtml-description")) {
 233         ret.description = uadv(source->keyword);
 234       } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
 235         ret.head_end = uadv(source->keyword);
 236       } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
 237         ret.body_start = uadv(source->keyword);
 238       } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
 239         ret.body = uadv(source->keyword);
 240       } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
 241         ret.body_end = uadv(source->keyword);
 242       } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
 243         ret.address_start = uadv(source->keyword);
 244       } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
 245         ret.address_end = uadv(source->keyword);
 246       } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
 247         ret.nav_attrs = uadv(source->keyword);
 248       } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
 249         ret.fchapter.just_numbers = utob(uadv(source->keyword));
 250       } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
 251         ret.fchapter.number_suffix = uadv(source->keyword);
 252       } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
 253         wchar_t *p = uadv(source->keyword);
 254         int n = 0;
 255         if (uisdigit(*p)) {
 256           n = utoi(p);
 257           p = uadv(p);
 258         }
 259         if (n >= ret.nfsect) {
 260           int i;
 261           ret.fsect = resize(ret.fsect, n+1);
 262           for (i = ret.nfsect; i <= n; i++)
 263             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 264           ret.nfsect = n+1;
 265         }
 266         ret.fsect[n].just_numbers = utob(p);
 267       } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
 268         wchar_t *p = uadv(source->keyword);
 269         int n = 0;
 270         if (uisdigit(*p)) {
 271           n = utoi(p);
 272           p = uadv(p);
 273         }
 274         if (n >= ret.nfsect) {
 275           int i;
 276           ret.fsect = resize(ret.fsect, n+1);
 277           for (i = ret.nfsect; i <= n; i++)
 278             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 279           ret.nfsect = n+1;
 280         }
 281         ret.fsect[n].number_suffix = p;
 282       }
 283     }
 284   }
 285
 286   /*  printf(" !!! leaf_level = %i\n", ret.leaf_level);
 287   printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
 288   printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
 289   printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
 290   printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
 291   printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
 292   printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
 293   printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
 294   return ret;
 295 }
 296
 297 paragraph *xhtml_config_filename(char *filename)
 298 {
 299     /*
 300      * If the user passes in a single filename as a parameter to
 301      * the `--html' command-line option, then we should assume it
 302      * to imply _two_ config directives:
 303      * \cfg{xhtml-single-filename}{whatever} and
 304      * \cfg{xhtml-leaf-level}{0}; the rationale being that the user
 305      * wants their output _in that file_.
 306      */
 307
 308     paragraph *p[2];
 309     int i, len;
 310     wchar_t *ufilename, *up;
 311
 312     for (i = 0; i < 2; i++) {
 313         p[i] = mknew(paragraph);
 314         memset(p[i], 0, sizeof(*p[i]));
 315         p[i]->type = para_Config;
 316         p[i]->next = NULL;
 317         p[i]->fpos.filename = "<command line>";
 318         p[i]->fpos.line = p[i]->fpos.col = -1;
 319     }
 320
 321     ufilename = ufroma_dup(filename);
 322     len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename");
 323     p[0]->keyword = mknewa(wchar_t, len);
 324     up = p[0]->keyword;
 325     ustrcpy(up, L"xhtml-single-filename");
 326     up = uadv(up);
 327     ustrcpy(up, ufilename);
 328     up = uadv(up);
 329     *up = L'\0';
 330     assert(up - p[0]->keyword < len);
 331     sfree(ufilename);
 332
 333     len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1;
 334     p[1]->keyword = mknewa(wchar_t, len);
 335     up = p[1]->keyword;
 336     ustrcpy(up, L"xhtml-leaf-level");
 337     up = uadv(up);
 338     ustrcpy(up, L"0");
 339     up = uadv(up);
 340     *up = L'\0';
 341     assert(up - p[1]->keyword < len);
 342
 343     p[0]->next = p[1];
 344
 345     return p[0];
 346 }
 347
 348 static xhtmlsection *xhtml_new_section(xhtmlsection *last)
 349 {
 350   xhtmlsection *ret = mknew(xhtmlsection);
 351   ret->next=NULL;
 352   ret->child=NULL;
 353   ret->parent=NULL;
 354   ret->chain=last;
 355   ret->para=NULL;
 356   ret->file=NULL;
 357   ret->fragment=NULL;
 358   ret->level=-1; /* marker: end of chain */
 359   return ret;
 360 }
 361
 362 /* Returns NULL or the section that marks that paragraph */
 363 static xhtmlsection *xhtml_find_section(paragraph *p)
 364 {
 365   xhtmlsection *ret = topsection;
 366   if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
 367     paragraph *p2 = sourceparas;
 368     paragraph *p3 = NULL;
 369     while (p2 && p2!=p) {
 370       if (xhtml_para_level(p2)!=-1) {
 371         p3 = p2;
 372       }
 373       p2=p2->next;
 374     }
 375     if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
 376       /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
 377        * So don't do that, then.
 378        */
 379       return NULL;
 380     }
 381     p=p3;
 382   }
 383   while (ret && ret->para != p) {
 384 /*    printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
 385     ret=ret->chain;
 386   }
 387   return ret;
 388 }
 389
 390 static void xhtml_format(paragraph *p, char *template_string, rdstringc *r)
 391 {
 392     char *c, *t;
 393     word *w;
 394     wchar_t *ws;
 395
 396     t = template_string;
 397     while (*t) {
 398         if (*t == '%' && t[1]) {
 399             int fmt;
 400
 401             t++;
 402             fmt = *t++;
 403
 404             if (fmt == '%') {
 405                 rdaddc(r, fmt);
 406                 continue;
 407             }
 408
 409             w = NULL;
 410             ws = NULL;
 411
 412             if (p->kwtext && fmt == 'n')
 413                 w = p->kwtext;
 414             else if (p->kwtext2 && fmt == 'b')
 415                 w = p->kwtext2;
 416             else if (p->keyword && *p->keyword && fmt == 'k')
 417                 ws = p->keyword;
 418             else
 419                 w = p->words;
 420
 421             while (w) {
 422                 switch (removeattr(w->type))
 423                 {
 424                   case word_Normal:
 425                     /*case word_Emph:
 426                      case word_Code:
 427                      case word_WeakCode:*/
 428                     xhtml_utostr(w->text, &c);
 429                     rdaddsc(r,c);
 430                     sfree(c);
 431                     break;
 432                 }
 433                 w = w->next;
 434             }
 435             if (ws) {
 436                 xhtml_utostr(ws, &c);
 437                 rdaddsc(r,c);
 438                 sfree(c);
 439             }
 440         } else {
 441             rdaddc(r, *t++);
 442         }
 443     }
 444 }
 445
 446 static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
 447 {
 448   xhtmlfile *ret = mknew(xhtmlfile);
 449
 450   ret->next=NULL;
 451   ret->child=NULL;
 452   ret->parent=NULL;
 453   ret->filename=NULL;
 454   ret->sections=sect;
 455   ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
 456   if (sect==NULL) {
 457     if (conf.leaf_level==0) { /* currently unused */
 458       ret->filename = smalloc(strlen(conf.single_filename)+1);
 459       sprintf(ret->filename, conf.single_filename);
 460     } else {
 461       ret->filename = smalloc(strlen(conf.contents_filename)+1);
 462       sprintf(ret->filename, conf.contents_filename);
 463     }
 464   } else {
 465     paragraph *p = sect->para;
 466     rdstringc fname_c = { 0, 0, NULL };
 467     xhtml_format(p, conf.template_filename, &fname_c);
 468     ret->filename = rdtrimc(&fname_c);
 469   }
 470   /*  printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
 471   return ret;
 472 }
 473
 474 /*
 475  * Walk the tree fixing up files which are actually leaf (ie
 476  * have no children) but aren't at leaf level, so they have the
 477  * leaf flag set.
 478  */
 479 void xhtml_fixup_layout(xhtmlfile* file)
 480 {
 481   if (file->child==NULL) {
 482     file->is_leaf = TRUE;
 483   } else {
 484     xhtml_fixup_layout(file->child);
 485   }
 486   if (file->next)
 487     xhtml_fixup_layout(file->next);
 488 }
 489
 490 /*
 491  * Create the tree structure so we know where everything goes.
 492  * Method:
 493  *
 494  * Ignoring file splitting, we have three choices with each new section:
 495  *
 496  * +-----------------+-----------------+
 497  * |                 |                 |
 498  * X            +----X----+           (1)
 499  *              |         |
 500  *              Y        (2)
 501  *              |
 502  *             (3)
 503  *
 504  * Y is the last section we added (currentsect).
 505  * If sect is the section we want to add, then:
 506  *
 507  * (1) if sect->level < currentsect->level
 508  * (2) if sect->level == currentsect->level
 509  * (3) if sect->level > currentsect->level
 510  *
 511  * This requires the constraint that you never skip section numbers
 512  * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
 513  *
 514  * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
 515  * more than one level at a time. Lots of asserts, and probably part of
 516  * the algorithm here, rely on this being true. (It currently isn't
 517  * enforced by halibut, however.)
 518  *
 519  * File splitting makes this harder. For instance, say we added at (3)
 520  * above and now need to add another section. We are splitting at level
 521  * 2, ie the level of Y. Z is the last section we added:
 522  *
 523  * +-----------------+-----------------+
 524  * |                 |                 |
 525  * X            +----X----+           (1)
 526  *              |         |
 527  *         +----Y----+   (1)
 528  *         |         |
 529  *         Z        (2)
 530  *         |
 531  *        (3)
 532  *
 533  * The (1) case is now split; we need to search upwards to find where
 534  * to actually link in. The other two cases remain the same (and will
 535  * always be like this).
 536  *
 537  * File splitting makes this harder, however. The decision of whether
 538  * to split to a new file is always on the same condition, however (is
 539  * the level of this section higher than the leaf_level configuration
 540  * value or not).
 541  *
 542  * Treating the cases backwards:
 543  *
 544  * (3) same file if sect->level > conf.leaf_level, otherwise new file
 545  *
 546  *     if in the same file, currentsect->child points to sect
 547  *     otherwise the linking is done through the file tree (which works
 548  *     in more or less the same way, ie currentfile->child points to
 549  *     the new file)
 550  *
 551  * (2) same file if sect->level > conf.leaf_level, otherwise new file
 552  *
 553  *     if in the same file, currentsect->next points to sect
 554  *     otherwise file linking and currentfile->next points to the new
 555  *     file (we know that Z must have caused a new file to be created)
 556  *
 557  * (1) same file if sect->level > conf.leaf_level, otherwise new file
 558  *
 559  *     this is actually effectively the same case as (2) here,
 560  *     except that we first have to travel up the sections to figure
 561  *     out which section this new one will be a sibling of. In doing
 562  *     so, we may disappear off the top of a file and have to go up
 563  *     to its parent in the file tree.
 564  *
 565  */
 566 static void xhtml_ponder_layout(paragraph *p)
 567 {
 568   xhtmlsection *lastsection;
 569   xhtmlsection *currentsect;
 570   xhtmlfile *currentfile;
 571
 572   lastfile = NULL;
 573   topsection = xhtml_new_section(NULL);
 574   topfile = xhtml_new_file(NULL);
 575   lastsection = topsection;
 576   currentfile = topfile;
 577   currentsect = topsection;
 578
 579   if (conf.leaf_level == 0) {
 580     topfile->is_leaf = 1;
 581     topfile->sections = topsection;
 582     topsection->file = topfile;
 583   }
 584
 585   for (; p; p=p->next)
 586   {
 587     int level = xhtml_para_level(p);
 588     if (level>0) /* actually a section */
 589     {
 590       xhtmlsection *sect;
 591       rdstringc frag_c = { 0, 0, NULL };
 592
 593       sect = xhtml_new_section(lastsection);
 594       lastsection = sect;
 595       sect->para = p;
 596
 597       xhtml_format(p, conf.template_fragment, &frag_c);
 598       sect->fragment = rdtrimc(&frag_c);
 599       sect->level = level;
 600       /*      printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
 601
 602       if (level>currentsect->level) { /* case (3) */
 603         if (level>conf.leaf_level) { /* same file */
 604           assert(currentfile->is_leaf);
 605           currentsect->child = sect;
 606           sect->parent=currentsect;
 607           sect->file=currentfile;
 608           /*          printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
 609           currentsect=sect;
 610         } else { /* new file */
 611           xhtmlfile *file = xhtml_new_file(sect);
 612           assert(!currentfile->is_leaf);
 613           currentfile->child=file;
 614           sect->file=file;
 615           file->parent=currentfile;
 616           /*          printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
 617           currentfile=file;
 618           currentsect=sect;
 619         }
 620       } else if (level >= currentsect->file->sections->level) {
 621         /* Case (1) or (2) *AND* still under the section that starts
 622          * the current file.
 623          *
 624          * I'm not convinced that this couldn't be rolled in with the
 625          * final else {} leg further down. It seems a lot of effort
 626          * this way.
 627          */
 628         if (level>conf.leaf_level) { /* stick within the same file */
 629           assert(currentfile->is_leaf);
 630           sect->file = currentfile;
 631           while (currentsect && currentsect->level > level &&
 632                  currentsect->file==currentsect->parent->file) {
 633             currentsect = currentsect->parent;
 634           }
 635           assert(currentsect);
 636           currentsect->next = sect;
 637           assert(currentsect->level == sect->level);
 638           sect->parent = currentsect->parent;
 639           currentsect = sect;
 640           /*          printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
 641         } else { /* new file */
 642           xhtmlfile *file = xhtml_new_file(sect);
 643           sect->file=file;
 644           currentfile->next=file;
 645           file->parent=currentfile->parent;
 646           file->is_leaf=(level==conf.leaf_level);
 647           file->sections=sect;
 648           /*          printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
 649           currentfile=file;
 650           currentsect=sect;
 651         }
 652       } else { /* Case (1) or (2) and we must move up the file tree first */
 653         /* this loop is now probably irrelevant - we know we can't connect
 654          * to anything in the current file */
 655         while (currentsect && level<currentsect->level) {
 656           currentsect=currentsect->parent;
 657           if (currentsect) {
 658             /*            printf(" * up one level to '%s'\n", currentsect->fragment);*/
 659           } else {
 660             /*            printf(" * up one level (off top of current file)\n");*/
 661           }
 662         }
 663         if (currentsect) {
 664           /* I'm pretty sure this can now never fire */
 665           assert(currentfile->is_leaf);
 666           /*          printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
 667           sect->file = currentfile;
 668           currentsect->next=sect;
 669           currentsect=sect;
 670         } else { /* find a file we can attach to */
 671           while (currentfile && currentfile->sections && level<currentfile->sections->level) {
 672             currentfile=currentfile->parent;
 673             if (currentfile) {
 674               /*              printf(" * up one file level to '%s'\n", currentfile->filename);*/
 675             } else {
 676               /*              printf(" * up one file level (off top of tree)\n");*/
 677             }
 678           }
 679           if (currentfile) { /* new file (we had to skip up a file to
 680                                 get here, so we must be dealing with a
 681                                 level no lower than the configured
 682                                 leaf_level */
 683             xhtmlfile *file = xhtml_new_file(sect);
 684             currentfile->next=file;
 685             sect->file=file;
 686             file->parent=currentfile->parent;
 687             file->is_leaf=(level==conf.leaf_level);
 688             file->sections=sect;
 689             /*            printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
 690             currentfile=file;
 691             currentsect=sect;
 692           } else {
 693             fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
 694           }
 695         }
 696       }
 697     }
 698   }
 699   topsection = lastsection; /* get correct end of the chain */
 700   xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
 701 }
 702
 703 static void xhtml_do_index();
 704 static void xhtml_do_file(xhtmlfile *file);
 705 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
 706 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
 707 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
 708 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
 709 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
 710 static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
 711 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
 712 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
 713
 714 /*
 715  * Do all the files in this structure.
 716  */
 717 static void xhtml_do_files(xhtmlfile *file)
 718 {
 719   xhtml_do_file(file);
 720   if (file->child)
 721     xhtml_do_files(file->child);
 722   if (file->next)
 723     xhtml_do_files(file->next);
 724 }
 725
 726 /*
 727  * Free up all memory used by the file tree from 'xfile' downwards
 728  */
 729 static void xhtml_free_file(xhtmlfile* xfile)
 730 {
 731   if (xfile==NULL) {
 732     return;
 733   }
 734
 735   if (xfile->filename) {
 736     sfree(xfile->filename);
 737   }
 738   xhtml_free_file(xfile->child);
 739   xhtml_free_file(xfile->next);
 740   sfree(xfile);
 741 }
 742
 743 /*
 744  * Main function.
 745  */
 746 void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
 747                    indexdata *in_idx)
 748 {
 749 /*  int i;*/
 750   indexentry *ientry;
 751   int ti;
 752   xhtmlsection *xsect;
 753
 754   sourceparas = sourceform;
 755   conf = xhtml_configure(sourceform);
 756   keywords = in_keywords;
 757   idx = in_idx;
 758
 759   /* Clear up the index entries backend data pointers */
 760   for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 761     ientry->backend_data=NULL;
 762   }
 763
 764   xhtml_ponder_layout(sourceform);
 765
 766   /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
 767 /*  xhtml_level_0(sourceform);
 768   for (i=1; i<=conf.leaf_level; i++)
 769   {
 770     xhtml_level(sourceform, i);
 771   }*/
 772
 773   /* new system ... (writes to *.html, but isn't fully trusted) */
 774   xhtml_do_top_file(topfile, sourceform);
 775   assert(!topfile->next); /* shouldn't have a sibling at all */
 776   if (topfile->child) {
 777     xhtml_do_files(topfile->child);
 778     xhtml_do_index();
 779   }
 780
 781   /* release file, section, index data structures */
 782   xsect = topsection;
 783   while (xsect) {
 784     xhtmlsection *tmp = xsect->chain;
 785     if (xsect->fragment) {
 786       sfree(xsect->fragment);
 787     }
 788     sfree(xsect);
 789     xsect = tmp;
 790   }
 791   xhtml_free_file(topfile);
 792   for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 793     if (ientry->backend_data!=NULL) {
 794       xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
 795       if (xi->sections!=NULL) {
 796         sfree(xi->sections);
 797       }
 798       sfree(xi);
 799     }
 800     ientry->backend_data = NULL;
 801   }
 802   sfree(conf.fsect);
 803 }
 804
 805 static int xhtml_para_level(paragraph *p)
 806 {
 807   switch (p->type)
 808   {
 809   case para_Title:
 810     return 0;
 811     break;
 812   case para_UnnumberedChapter:
 813   case para_Chapter:
 814   case para_Appendix:
 815     return 1;
 816     break;
 817 /*  case para_BiblioCited:
 818     return 2;
 819     break;*/
 820   case para_Heading:
 821   case para_Subsect:
 822     return p->aux+2;
 823     break;
 824   default:
 825     return -1;
 826     break;
 827   }
 828 }
 829
 830 /* Output the nav links for the current file.
 831  * file == NULL means we're doing the index
 832  */
 833 static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
 834 {
 835   xhtmlfile *xhtml_next_file = NULL;
 836   fprintf(fp, "<p");
 837   if (conf.nav_attrs!=NULL) {
 838     fprintf(fp, " %ls>", conf.nav_attrs);
 839   } else {
 840     fprintf(fp, ">");
 841   }
 842   if (xhtml_last_file==NULL) {
 843     fprintf(fp, "Previous | ");
 844   } else {
 845     fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
 846   }
 847   fprintf(fp, "<a href='%s'>Contents</a> | ", conf.contents_filename);
 848   if (file == NULL) {
 849     fprintf(fp, "Index | ");
 850   } else {
 851     fprintf(fp, "<a href='%s'>Index</a> | ", conf.index_filename);
 852   }
 853   if (file != NULL) { /* otherwise we're doing nav links for the index */
 854     if (xhtml_next_file==NULL)
 855       xhtml_next_file = file->child;
 856     if (xhtml_next_file==NULL)
 857       xhtml_next_file = file->next;
 858     if (xhtml_next_file==NULL)
 859       xhtml_next_file = file->parent->next;
 860   }
 861   if (xhtml_next_file==NULL) {
 862     if (file==NULL) { /* index, so no next file */
 863       fprintf(fp, "Next ");
 864     } else {
 865       fprintf(fp, "<a href='%s'>Next</a>", conf.index_filename);
 866     }
 867   } else {
 868     fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
 869   }
 870   fprintf(fp, "</p>\n");
 871 }
 872
 873 /* Write out the index file */
 874 static void xhtml_do_index_body(FILE *fp)
 875 {
 876   indexentry *y;
 877   int ti;
 878
 879   if (count234(idx->entries) == 0)
 880     return;                            /* don't write anything at all */
 881
 882   fprintf(fp, "<dl>\n");
 883   /* iterate over idx->entries using the tree functions and display everything */
 884   for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
 885     if (y->backend_data) {
 886       int i;
 887       xhtmlindex *xi;
 888
 889       fprintf(fp, "<dt>");
 890       xhtml_para(fp, y->text, FALSE);
 891       fprintf(fp, "</dt>\n<dd>");
 892
 893       xi = (xhtmlindex*) y->backend_data;
 894       for (i=0; i<xi->nsection; i++) {
 895         xhtmlsection *sect = xi->sections[i];
 896         if (sect) {
 897           fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
 898           if (sect->para->kwtext) {
 899             xhtml_para(fp, sect->para->kwtext, FALSE);
 900           } else if (sect->para->words) {
 901             xhtml_para(fp, sect->para->words, FALSE);
 902           }
 903           fprintf(fp, "</a>");
 904           if (i+1<xi->nsection) {
 905             fprintf(fp, ", ");
 906           }
 907         }
 908       }
 909       fprintf(fp, "</dd>\n");
 910     }
 911   }
 912   fprintf(fp, "</dl>\n");
 913 }
 914 static void xhtml_do_index()
 915 {
 916   word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index",
 917       { NULL, 0, 0}, NULL };
 918   FILE *fp = fopen(conf.index_filename, "w");
 919
 920   if (fp==NULL)
 921     fatal(err_cantopenw, conf.index_filename);
 922   xhtml_doheader(fp, &temp_word);
 923   xhtml_donavlinks(fp, NULL);
 924
 925   xhtml_do_index_body(fp);
 926
 927   xhtml_donavlinks(fp, NULL);
 928   xhtml_dofooter(fp);
 929   fclose(fp);
 930 }
 931
 932 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
 933 static void xhtml_do_file(xhtmlfile *file)
 934 {
 935   FILE *fp = fopen(file->filename, "w");
 936   if (fp==NULL)
 937     fatal(err_cantopenw, file->filename);
 938
 939   if (file->sections->para->words) {
 940     xhtml_doheader(fp, file->sections->para->words);
 941   } else if (file->sections->para->kwtext) {
 942     xhtml_doheader(fp, file->sections->para->kwtext);
 943   } else {
 944     xhtml_doheader(fp, NULL);
 945   }
 946
 947   xhtml_donavlinks(fp, file);
 948
 949   if (file->is_leaf && conf.leaf_contains_contents &&
 950       xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
 951     xhtml_do_contents(fp, file);
 952   xhtml_do_sections(fp, file->sections);
 953   if (!file->is_leaf)
 954     xhtml_do_naked_contents(fp, file);
 955
 956   xhtml_donavlinks(fp, file);
 957
 958   xhtml_dofooter(fp);
 959   fclose(fp);
 960
 961   xhtml_last_file = file;
 962 }
 963
 964 /* Output the top-level file. */
 965 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
 966 {
 967   paragraph *p;
 968   int done=FALSE;
 969   FILE *fp = fopen(file->filename, "w");
 970   if (fp==NULL)
 971     fatal(err_cantopenw, file->filename);
 972
 973   /* Do the title -- only one allowed */
 974   for (p = sourceform; p && !done; p = p->next)
 975   {
 976     if (p->type == para_Title)
 977     {
 978       xhtml_doheader(fp, p->words);
 979       done=TRUE;
 980     }
 981   }
 982   if (!done)
 983     xhtml_doheader(fp, NULL /* Eek! */);
 984
 985   /*
 986    * Display the title.
 987    */
 988   for (p = sourceform; p; p = p->next)
 989   {
 990     if (p->type == para_Title) {
 991       xhtml_heading(fp, p, FALSE);
 992       break;
 993     }
 994   }
 995
 996   /* Do the preamble */
 997   for (p = sourceform; p; p = p->next)
 998   {
 999     if (p->type == para_Chapter || p->type == para_Heading ||
1000         p->type == para_Subsect || p->type == para_Appendix ||
1001         p->type == para_UnnumberedChapter) {
1002         /*
1003          * We've found the end of the preamble. Do every normal
1004          * paragraph up to there.
1005          */
1006         xhtml_do_paras(fp, sourceform, p, FALSE);
1007         break;
1008     }
1009   }
1010
1011   xhtml_do_contents(fp, file);
1012   xhtml_do_sections(fp, file->sections);
1013
1014   /*
1015    * Put the index in the top file if we're in single-file mode
1016    * (leaf-level 0).
1017    */
1018   if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
1019     fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
1020     xhtml_do_index_body(fp);
1021   }
1022
1023   xhtml_dofooter(fp);
1024   fclose(fp);
1025 }
1026
1027 /* Convert a Unicode string to an ASCII one. '?' is
1028  * used for unmappable characters.
1029  */
1030 static void xhtml_utostr(wchar_t *in, char **out)
1031 {
1032   int l = ustrlen(in);
1033   int i;
1034   *out = smalloc(l+1);
1035   for (i=0; i<l; i++)
1036   {
1037     if (in[i]>=32 && in[i]<=126)
1038       (*out)[i]=(char)in[i];
1039     else
1040       (*out)[i]='?';
1041   }
1042   (*out)[i]=0;
1043 }
1044
1045 /*
1046  * Write contents for the given file, and subfiles, down to
1047  * the appropriate contents depth. Returns the number of
1048  * entries written.
1049  */
1050 static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
1051 {
1052   int level, limit, count = 0;
1053   if (!file)
1054     return 0;
1055
1056   level = (file->sections)?(file->sections->level):(0);
1057   limit = conf.contents_depth[(level>5)?(5):(level)];
1058   start_level = (file->is_leaf) ? (level-1) : (level);
1059   last_level = start_level;
1060
1061   count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1062   count += xhtml_do_contents_limit(fp, file->child, limit);
1063   if (fp!=NULL) {
1064     while (last_level > start_level) {
1065       last_level--;
1066       fprintf(fp, "</li></ul>\n");
1067     }
1068   }
1069   return count;
1070 }
1071
1072 /* As above, but doesn't do anything in the current file */
1073 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
1074 {
1075   int level, limit, start_level, count = 0;
1076   if (!file)
1077     return 0;
1078
1079   level = (file->sections)?(file->sections->level):(0);
1080   limit = conf.contents_depth[(level>5)?(5):(level)];
1081   start_level = (file->is_leaf) ? (level-1) : (level);
1082   last_level = start_level;
1083
1084   count = xhtml_do_contents_limit(fp, file->child, limit);
1085   if (fp!=NULL) {
1086     while (last_level > start_level) {
1087       last_level--;
1088       fprintf(fp, "</li></ul>\n");
1089     }
1090   }
1091   return count;
1092 }
1093
1094 /*
1095  * Write contents for the given file, children, and siblings, down to
1096  * given limit contents depth.
1097  */
1098 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
1099 {
1100   int count = 0;
1101   while (file) {
1102     count += xhtml_do_contents_section_limit(fp, file->sections, limit);
1103     count += xhtml_do_contents_limit(fp, file->child, limit);
1104     file = file->next;
1105   }
1106   return count;
1107 }
1108
1109 /*
1110  * Write contents entries for the given section tree, down to the
1111  * limit contents depth.
1112  */
1113 static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
1114 {
1115   int count = 0;
1116   while (section) {
1117     if (!xhtml_add_contents_entry(fp, section, limit))
1118       return 0;
1119     else
1120       count++;
1121     count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1122     section = section->next;
1123   }
1124   return count;
1125 }
1126
1127 /*
1128  * Write contents entries for the given section tree, down to the
1129  * limit contents depth.
1130  */
1131 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1132 {
1133   int count = 0;
1134   if (!section)
1135     return 0;
1136   xhtml_add_contents_entry(fp, section, limit);
1137   count=1;
1138   count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1139   /*  section=section->child;
1140   while (section && xhtml_add_contents_entry(fp, section, limit)) {
1141     section = section->next;
1142     }*/
1143   return count;
1144 }
1145
1146 /*
1147  * Add a section entry, unless we're exceeding the limit, in which
1148  * case return FALSE (otherwise return TRUE).
1149  */
1150 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1151 {
1152   if (!section || section->level > limit)
1153     return FALSE;
1154   if (fp==NULL || section->level < 0)
1155     return TRUE;
1156   if (last_level > section->level) {
1157     while (last_level > section->level) {
1158       last_level--;
1159       fprintf(fp, "</li></ul>\n");
1160     }
1161     fprintf(fp, "</li>\n");
1162   } else if (last_level < section->level) {
1163     assert(last_level == section->level - 1);
1164     last_level++;
1165     fprintf(fp, "<ul>\n");
1166   } else {
1167     fprintf(fp, "</li>\n");
1168   }
1169   fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1170   if (section->para->kwtext) {
1171     xhtml_para(fp, section->para->kwtext, FALSE);
1172     if (section->para->words) {
1173       fprintf(fp, ": ");
1174     }
1175   }
1176   if (section->para->words) {
1177     xhtml_para(fp, section->para->words, FALSE);
1178   }
1179   fprintf(fp, "</a>\n");
1180   return TRUE;
1181 }
1182
1183 /*
1184  * Write all the sections in this file. Do all paragraphs in this section, then all
1185  * children (recursively), then go on to the next one (tail recursively).
1186  */
1187 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1188 {
1189   while (sections) {
1190     currentsection = sections;
1191     xhtml_do_paras(fp, sections->para, NULL, TRUE);
1192     xhtml_do_sections(fp, sections->child);
1193     sections = sections->next;
1194   }
1195 }
1196
1197 /* Write this list of paragraphs. Close off all lists at the end. */
1198 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1199                            int indexable)
1200 {
1201   int last_type = -1, ptype, first=TRUE;
1202   stack lcont_stack = stk_new();
1203   if (!p)
1204     return;
1205
1206 /*  for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1207   for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
1208     first=FALSE;
1209     switch (ptype = p->type)
1210     {
1211       /*
1212        * Things we ignore because we've already processed them or
1213        * aren't going to touch them in this pass.
1214        */
1215      case para_IM:
1216      case para_BR:
1217      case para_Biblio:                 /* only touch BiblioCited */
1218      case para_VersionID:
1219      case para_NoCite:
1220      case para_Title:
1221        break;
1222
1223        /*
1224         * Chapter titles.
1225         */
1226       case para_Chapter:
1227       case para_Appendix:
1228       case para_UnnumberedChapter:
1229         xhtml_heading(fp, p, indexable);
1230         break;
1231
1232       case para_Heading:
1233       case para_Subsect:
1234         xhtml_heading(fp, p, indexable);
1235         break;
1236
1237       case para_Rule:
1238         fprintf(fp, "\n<hr />\n");
1239         break;
1240
1241       case para_Normal:
1242       case para_Copyright:
1243         fprintf(fp, "\n<p>");
1244         xhtml_para(fp, p->words, indexable);
1245         fprintf(fp, "</p>\n");
1246         break;
1247
1248       case para_LcontPush:
1249         {
1250             int *p;
1251             p = mknew(int);
1252             *p = last_type;
1253             stk_push(lcont_stack, p);
1254             last_type = para_Normal;
1255         }
1256         break;
1257       case para_LcontPop:
1258         {
1259             int *p = stk_pop(lcont_stack);
1260             assert(p);
1261             ptype = last_type = *p;
1262             sfree(p);
1263             goto closeofflist;         /* ick */
1264         }
1265         break;
1266       case para_QuotePush:
1267         fprintf(fp, "<blockquote>\n");
1268         break;
1269       case para_QuotePop:
1270         fprintf(fp, "</blockquote>\n");
1271         break;
1272
1273       case para_Bullet:
1274       case para_NumberedList:
1275       case para_Description:
1276       case para_DescribedThing:
1277       case para_BiblioCited:
1278         if (last_type!=p->type &&
1279             !(last_type==para_DescribedThing && p->type==para_Description) &&
1280             !(last_type==para_Description && p->type==para_DescribedThing)) {
1281           /* start up list if necessary */
1282           if (p->type == para_Bullet) {
1283             fprintf(fp, "<ul>\n");
1284           } else if (p->type == para_NumberedList) {
1285             fprintf(fp, "<ol>\n");
1286           } else if (p->type == para_BiblioCited ||
1287                      p->type == para_DescribedThing ||
1288                      p->type == para_Description) {
1289             fprintf(fp, "<dl>\n");
1290           }
1291         }
1292         if (p->type == para_Bullet || p->type == para_NumberedList) {
1293           fprintf(fp, "<li>");
1294         } else if (p->type == para_DescribedThing) {
1295           fprintf(fp, "<dt>");
1296         } else if (p->type == para_Description) {
1297           fprintf(fp, "<dd>");
1298         } else if (p->type == para_BiblioCited) {
1299           fprintf(fp, "<dt>");
1300           xhtml_para(fp, p->kwtext, indexable);
1301           fprintf(fp, "</dt>\n<dd>");
1302         }
1303         xhtml_para(fp, p->words, indexable);
1304         {
1305           paragraph *p2 = p->next;
1306           if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1307             break;
1308         }
1309
1310         closeofflist:
1311         if (ptype == para_BiblioCited) {
1312           fprintf(fp, "</dd>\n");
1313         } else if (ptype == para_DescribedThing) {
1314           fprintf(fp, "</dt>");
1315         } else if (ptype == para_Description) {
1316           fprintf(fp, "</dd>");
1317         } else if (ptype == para_Bullet || ptype == para_NumberedList) {
1318           fprintf(fp, "</li>");
1319         }
1320         if (ptype == para_Bullet || ptype == para_NumberedList ||
1321             ptype == para_BiblioCited || ptype == para_Description ||
1322             ptype == para_DescribedThing)
1323           /* close off list if necessary */
1324         {
1325           paragraph *p2 = p->next;
1326           int close_off=FALSE;
1327 /*          if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1328           if (p2 && xhtml_para_level(p2)==-1) {
1329             if (p2->type != ptype &&
1330                 !(p2->type==para_DescribedThing && ptype==para_Description) &&
1331                 !(p2->type==para_Description && ptype==para_DescribedThing) &&
1332                 p2->type != para_LcontPush)
1333               close_off=TRUE;
1334           } else {
1335             close_off=TRUE;
1336           }
1337           if (close_off) {
1338             if (ptype == para_Bullet) {
1339               fprintf(fp, "</ul>\n");
1340             } else if (ptype == para_NumberedList) {
1341               fprintf(fp, "</ol>\n");
1342             } else if (ptype == para_BiblioCited ||
1343                        ptype == para_Description ||
1344                        ptype == para_DescribedThing) {
1345               fprintf(fp, "</dl>\n");
1346             }
1347           }
1348         }
1349         break;
1350
1351       case para_Code:
1352         xhtml_codepara(fp, p->words);
1353         break;
1354     }
1355     last_type = ptype;
1356   }
1357
1358   stk_free(lcont_stack);
1359 }
1360
1361 /*
1362  * Output a header for this XHTML file.
1363  */
1364 static void xhtml_doheader(FILE *fp, word *title)
1365 {
1366   fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1367   fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1368   fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1369   if (title==NULL)
1370     fprintf(fp, "The thing with no name!");
1371   else
1372     xhtml_para(fp, title, FALSE);
1373   fprintf(fp, "</title>\n");
1374   fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1375   if (conf.author)
1376     fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1377   if (conf.description)
1378     fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1379   if (conf.head_end)
1380     fprintf(fp, "%ls\n", conf.head_end);
1381   fprintf(fp, "</head>\n\n");
1382   if (conf.body)
1383     fprintf(fp, "%ls\n", conf.body);
1384   else
1385     fprintf(fp, "<body>\n");
1386   if (conf.body_start)
1387     fprintf(fp, "%ls\n", conf.body_start);
1388 }
1389
1390 /*
1391  * Output a footer for this XHTML file.
1392  */
1393 static void xhtml_dofooter(FILE *fp)
1394 {
1395   fprintf(fp, "\n<hr />\n\n");
1396   if (conf.body_end)
1397     fprintf(fp, "%ls\n", conf.body_end);
1398   if (!conf.suppress_address) {
1399     fprintf(fp,"<address>\n");
1400     if (conf.address_start)
1401       fprintf(fp, "%ls\n", conf.address_start);
1402     /* Do the version ID */
1403     if (conf.include_version_id) {
1404       paragraph *p;
1405       int started = 0;
1406       for (p = sourceparas; p; p = p->next)
1407         if (p->type == para_VersionID) {
1408           xhtml_versionid(fp, p->words, started);
1409           started = 1;
1410         }
1411     }
1412     if (conf.address_end)
1413       fprintf(fp, "%ls\n", conf.address_end);
1414     fprintf(fp, "</address>\n");
1415   }
1416   fprintf(fp, "</body>\n\n</html>\n");
1417 }
1418
1419 /*
1420  * Output the versionid paragraph. Typically this is a version control
1421  * ID string (such as $Id...$ in RCS).
1422  */
1423 static void xhtml_versionid(FILE *fp, word *text, int started)
1424 {
1425   rdstringc t = { 0, 0, NULL };
1426
1427   rdaddc(&t, '[');                     /* FIXME: configurability */
1428   xhtml_rdaddwc(&t, text, NULL, FALSE);
1429   rdaddc(&t, ']');                     /* FIXME: configurability */
1430
1431   if (started)
1432     fprintf(fp, "<br />\n");
1433   fprintf(fp, "%s\n", t.text);
1434   sfree(t.text);
1435 }
1436
1437 /* Is this an XHTML reserved character? */
1438 static int xhtml_reservedchar(int c)
1439 {
1440   if (c=='&' || c=='<' || c=='>' || c=='"')
1441     return TRUE;
1442   else
1443     return FALSE;
1444 }
1445
1446 /*
1447  * Convert a wide string into valid XHTML: Anything outside ASCII will
1448  * be fixed up as an entity. Currently we don't worry about constraining the
1449  * encoded character set, which we should probably do at some point (we can
1450  * still fix up and return FALSE - see the last comment here). We also don't
1451  * currently
1452  *
1453  * Because this is only used for words, spaces are HARD spaces (any other
1454  * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1455  * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1456  * rule).
1457  *
1458  * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1459  * it in `*result'. If `result' is NULL, merely checks whether all
1460  * characters in the string are feasible.
1461  *
1462  * Return is nonzero if all characters are OK. If not all
1463  * characters are OK but `result' is non-NULL, a result _will_
1464  * still be generated!
1465  */
1466 static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1467                          int hard_spaces) {
1468     int doing = (result != 0);
1469     int ok = TRUE;
1470     char *p = NULL;
1471     int plen = 0, psize = 0;
1472
1473     if (maxlen <= 0)
1474         maxlen = -1;
1475
1476     for (; *s && maxlen != 0; s++, maxlen--) {
1477         wchar_t c = *s;
1478
1479 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1480
1481         if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1482             /* Char is OK. */
1483             if (doing)
1484             {
1485               ensure_size(plen);
1486               p[plen++] = (char)c;
1487             }
1488         } else {
1489             /* Char needs fixing up. */
1490             /* ok = FALSE; -- currently we never return FALSE; we
1491              * might want to when considering a character set for the
1492              * encoded document.
1493              */
1494             if (doing)
1495             {
1496               if (c==32) { /* a space in a word is a hard space */
1497                 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1498                 sprintf(p+plen, "&nbsp;");
1499                 plen+=6;
1500               } else {
1501                 /* FIXME: entity names! */
1502                 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1503                 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1504               }
1505             }
1506         }
1507     }
1508     if (doing) {
1509         p = resize(p, plen+1);
1510         p[plen] = '\0';
1511         *result = p;
1512     }
1513     return ok;
1514 }
1515
1516 /*
1517  * This formats the given words as XHTML.
1518  *
1519  * `indexable', if FALSE, prohibits adding any index references.
1520  * You might use this, for example, if an index reference occurred
1521  * in a section title, to prevent phony index references when the
1522  * section title is processed in strange places such as contents
1523  * sections.
1524  */
1525 static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
1526     char *c;
1527     keyword *kwl;
1528     xhtmlsection *sect;
1529     indextag *itag;
1530     int ti;
1531
1532     for (; text && text != end; text = text->next) {
1533       switch (text->type) {
1534       case word_HyperLink:
1535         xhtml_utostr(text->text, &c);
1536         rdaddsc(rs, "<a href=\"");
1537         rdaddsc(rs, c);
1538         rdaddsc(rs, "\">");
1539         sfree(c);
1540         break;
1541
1542       case word_UpperXref:
1543       case word_LowerXref:
1544         kwl = kw_lookup(keywords, text->text);
1545         if (kwl) {
1546           sect=xhtml_find_section(kwl->para);
1547           if (sect) {
1548             rdaddsc(rs, "<a href=\"");
1549             rdaddsc(rs, sect->file->filename);
1550             rdaddc(rs, '#');
1551             rdaddsc(rs, sect->fragment);
1552             rdaddsc(rs, "\">");
1553           } else {
1554             rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1555             error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1556           }
1557         } else {
1558           rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1559           error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1560         }
1561         break;
1562
1563       case word_IndexRef: /* in theory we could make an index target here */
1564 /*        rdaddsc(rs, "<a name=\"idx-");
1565         xhtml_utostr(text->text, &c);
1566         rdaddsc(rs, c);
1567         sfree(c);
1568         rdaddsc(rs, "\"></a>");*/
1569         /* what we _do_ need to do is to fix up the backend data
1570          * for any indexentry this points to.
1571          */
1572         if (!indexable)
1573           break;
1574
1575         for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1576           /* FIXME: really ustricmp() and not ustrcmp()? */
1577           if (ustricmp(itag->name, text->text)==0) {
1578             break;
1579           }
1580         }
1581         if (itag!=NULL) {
1582           if (itag->refs!=NULL) {
1583             int i;
1584             for (i=0; i<itag->nrefs; i++) {
1585               xhtmlindex *idx_ref;
1586               indexentry *ientry;
1587
1588               ientry = itag->refs[i];
1589               if (ientry->backend_data==NULL) {
1590                 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1591                 if (idx_ref==NULL)
1592                   fatal(err_nomemory);
1593                 idx_ref->nsection = 0;
1594                 idx_ref->size = 4;
1595                 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1596                 if (idx_ref->sections==NULL)
1597                   fatal(err_nomemory);
1598                 ientry->backend_data = idx_ref;
1599               } else {
1600                 idx_ref = ientry->backend_data;
1601                 if (idx_ref->nsection+1 > idx_ref->size) {
1602                   int new_size = idx_ref->size * 2;
1603                   idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1604                   if (idx_ref->sections==NULL) {
1605                     fatal(err_nomemory);
1606                   }
1607                   idx_ref->size = new_size;
1608                 }
1609               }
1610               idx_ref->sections[idx_ref->nsection++] = currentsection;
1611 #if 0
1612 #endif
1613             }
1614           } else {
1615             fatal(err_whatever, "Index tag had no entries!");
1616           }
1617         } else {
1618           fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1619           fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1620         }
1621         break;
1622
1623       case word_HyperEnd:
1624       case word_XrefEnd:
1625         rdaddsc(rs, "</a>");
1626         break;
1627
1628       case word_Normal:
1629       case word_Emph:
1630       case word_Code:
1631       case word_WeakCode:
1632       case word_WhiteSpace:
1633       case word_EmphSpace:
1634       case word_CodeSpace:
1635       case word_WkCodeSpace:
1636       case word_Quote:
1637       case word_EmphQuote:
1638       case word_CodeQuote:
1639       case word_WkCodeQuote:
1640         assert(text->type != word_CodeQuote &&
1641                text->type != word_WkCodeQuote);
1642         if (towordstyle(text->type) == word_Emph &&
1643             (attraux(text->aux) == attr_First ||
1644              attraux(text->aux) == attr_Only))
1645             rdaddsc(rs, "<em>");
1646         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1647                  (attraux(text->aux) == attr_First ||
1648                   attraux(text->aux) == attr_Only))
1649             rdaddsc(rs, "<code>");
1650
1651         if (removeattr(text->type) == word_Normal) {
1652           if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
1653             rdaddsc(rs, c);
1654           else
1655             xhtml_rdaddwc(rs, text->alt, NULL, indexable);
1656           sfree(c);
1657         } else if (removeattr(text->type) == word_WhiteSpace) {
1658           rdaddc(rs, ' ');
1659         } else if (removeattr(text->type) == word_Quote) {
1660           rdaddsc(rs, "&quot;");
1661         }
1662
1663         if (towordstyle(text->type) == word_Emph &&
1664             (attraux(text->aux) == attr_Last ||
1665              attraux(text->aux) == attr_Only))
1666             rdaddsc(rs, "</em>");
1667         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1668                  (attraux(text->aux) == attr_Last ||
1669                   attraux(text->aux) == attr_Only))
1670             rdaddsc(rs, "</code>");
1671         break;
1672       }
1673     }
1674 }
1675
1676 /* Output a heading, formatted as XHTML.
1677  */
1678 static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
1679 {
1680     rdstringc t = { 0, 0, NULL };
1681     word *tprefix = p->kwtext;
1682     word *nprefix = p->kwtext2;
1683     word *text = p->words;
1684     int level = xhtml_para_level(p);
1685     xhtmlsection *sect = xhtml_find_section(p);
1686     xhtmlheadfmt *fmt;
1687     char *fragment;
1688     if (sect) {
1689       fragment = sect->fragment;
1690     } else {
1691       if (p->type == para_Title)
1692         fragment = "title";
1693       else {
1694         fragment = ""; /* FIXME: what else can we do? */
1695         error(err_whatever, "Couldn't locate heading cross-reference!");
1696       }
1697     }
1698
1699     if (p->type == para_Title)
1700         fmt = NULL;
1701     else if (level == 1)
1702         fmt = &conf.fchapter;
1703     else if (level-1 < conf.nfsect)
1704         fmt = &conf.fsect[level-1];
1705     else
1706         fmt = &conf.fsect[conf.nfsect-1];
1707
1708     if (fmt && fmt->just_numbers && nprefix) {
1709         xhtml_rdaddwc(&t, nprefix, NULL, indexable);
1710         if (fmt) {
1711             char *c;
1712             if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1713                 rdaddsc(&t, c);
1714                 sfree(c);
1715             }
1716         }
1717     } else if (fmt && !fmt->just_numbers && tprefix) {
1718         xhtml_rdaddwc(&t, tprefix, NULL, indexable);
1719         if (fmt) {
1720             char *c;
1721             if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1722                 rdaddsc(&t, c);
1723                 sfree(c);
1724             }
1725         }
1726     }
1727     xhtml_rdaddwc(&t, text, NULL, indexable);
1728     /*
1729      * If we're outputting in single-file mode, we need to lower
1730      * the level of each heading by one, because the overall
1731      * document title will be sitting right at the top as an <h1>
1732      * and so chapters and sections should start at <h2>.
1733      *
1734      * Even if not, the document title will come back from
1735      * xhtml_para_level() as level zero, so we must increment that
1736      * no matter what leaf_level is set to.
1737      */
1738     if (conf.leaf_level == 0 || level == 0)
1739         level++;
1740     fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1741     sfree(t.text);
1742 }
1743
1744 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1745  * This looks pretty simple; I may have missed something ...
1746  */
1747 static void xhtml_para(FILE *fp, word *text, int indexable)
1748 {
1749   rdstringc out = { 0, 0, NULL };
1750   xhtml_rdaddwc(&out, text, NULL, indexable);
1751   fprintf(fp, "%s", out.text);
1752   sfree(out.text);
1753 }
1754
1755 /* Output a code paragraph. I'm treating this as preformatted, which
1756  * may not be entirely correct. See xhtml_para() for my worries about
1757  * this being overly-simple; however I think that most of the complexity
1758  * of the text backend came entirely out of word wrapping anyway.
1759  */
1760 static void xhtml_codepara(FILE *fp, word *text)
1761 {
1762   fprintf(fp, "<pre>");
1763     for (; text; text = text->next) if (text->type == word_WeakCode) {
1764         word *here, *next;
1765         char *c;
1766
1767         /*
1768          * See if this WeakCode is followed by an Emph to indicate
1769          * emphasis.
1770          */
1771         here = text;
1772         if (text->next && text->next->type == word_Emph) {
1773             next = text = text->next;
1774         } else
1775             next = NULL;
1776
1777         if (next) {
1778             wchar_t *t, *e;
1779             int n;
1780
1781             t = here->text;
1782             e = next->text;
1783
1784             while (*e) {
1785                 int ec = *e;
1786
1787                 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1788                 xhtml_convert(t, n, &c, FALSE);
1789                 fprintf(fp, "%s%s%s",
1790                         (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1791                         c,
1792                         (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1793                 sfree(c);
1794
1795                 t += n;
1796                 e += n;
1797             }
1798
1799             xhtml_convert(t, 0, &c, FALSE);
1800             fprintf(fp, "%s\n", c);
1801             sfree(c);
1802         } else {
1803             xhtml_convert(here->text, 0, &c, FALSE);
1804             fprintf(fp, "%s\n", c);
1805             sfree(c);
1806         }
1807     }
1808   fprintf(fp, "</pre>\n");
1809 }