mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_xhtml.c

   1 /*
   2  * xhtml backend for Halibut
   3  * (initial implementation by James Aylett)
   4  *
   5  * Still to do:
   6  *
   7  *  +++ doesn't handle non-breaking hyphens. Not sure how to yet.
   8  *  +++ entity names (from a file -- ideally supply normal SGML files)
   9  *  +++ configuration directive to file split where the current layout
  10  *      code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
  11  *      perhaps others.
  12  *
  13  * Limitations:
  14  *
  15  *  +++ biblio/index references target the nearest section marker, rather
  16  *   than having a dedicated target themselves. In large bibliographies
  17  *   this will cause problems. (The solution is to fake up a response
  18  *   from xhtml_find_section(), probably linking it into the sections
  19  *   chain just in case we need it again, and to make freeing it up
  20  *   easier.) docsrc.pl used to work as we do, however, and SGT agrees that
  21  *   this is acceptable for now.
  22  *  +++ can't cope with leaf-level == 0. It's all to do with the
  23  *   top-level file not being normal, probably not even having a valid
  24  *   section level, and stuff like that. I question whether this is an
  25  *   issue, frankly; small manuals that fit on one page should probably
  26  *   not be written in halibut at all.
  27  */
  28
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <assert.h>
  33 #include "halibut.h"
  34
  35 struct xhtmlsection_Struct {
  36     struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
  37     struct xhtmlsection_Struct *child; /* NULL if split across files */
  38     struct xhtmlsection_Struct *parent; /* NULL if split across files */
  39     struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
  40     paragraph *para;
  41     struct xhtmlfile_Struct *file; /* which file is this a part of? */
  42     char *fragment; /* fragment id within the file */
  43     int level;
  44 };
  45
  46 struct xhtmlfile_Struct {
  47     struct xhtmlfile_Struct *next;
  48     struct xhtmlfile_Struct *child;
  49     struct xhtmlfile_Struct *parent;
  50     char *filename;
  51     struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
  52     int is_leaf; /* is this file a leaf file, ie does it not have any children? */
  53 };
  54
  55 typedef struct xhtmlsection_Struct xhtmlsection;
  56 typedef struct xhtmlfile_Struct xhtmlfile;
  57 typedef struct xhtmlindex_Struct xhtmlindex;
  58
  59 struct xhtmlindex_Struct {
  60   int nsection;
  61   int size;
  62   xhtmlsection **sections;
  63 };
  64
  65 typedef struct {
  66     int just_numbers;
  67     wchar_t *number_suffix;
  68 } xhtmlheadfmt;
  69
  70 typedef struct {
  71   int contents_depth[6];
  72   int leaf_contains_contents;
  73   int leaf_level;
  74   int leaf_smallest_contents;
  75   int include_version_id;
  76   wchar_t *author, *description;
  77   wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
  78   int suppress_address;
  79   xhtmlheadfmt fchapter, *fsect;
  80   int nfsect;
  81 } xhtmlconfig;
  82
  83 /*static void xhtml_level(paragraph *, int);
  84 static void xhtml_level_0(paragraph *);
  85 static void xhtml_docontents(FILE *, paragraph *, int);
  86 static void xhtml_dosections(FILE *, paragraph *, int);
  87 static void xhtml_dobody(FILE *, paragraph *, int);*/
  88
  89 static void xhtml_doheader(FILE *, word *);
  90 static void xhtml_dofooter(FILE *);
  91 static void xhtml_versionid(FILE *, word *, int);
  92
  93 static void xhtml_utostr(wchar_t *, char **);
  94 static int xhtml_para_level(paragraph *);
  95 static int xhtml_reservedchar(int);
  96
  97 static int xhtml_convert(wchar_t *, int, char **, int);
  98 static void xhtml_rdaddwc(rdstringc *, word *, word *, int);
  99 static void xhtml_para(FILE *, word *, int);
 100 static void xhtml_codepara(FILE *, word *);
 101 static void xhtml_heading(FILE *, paragraph *, int);
 102
 103 /* File-global variables are much easier than passing these things
 104  * all over the place. Evil, but easier. We can replace this with a single
 105  * structure at some point.
 106  */
 107 static xhtmlconfig conf;
 108 static keywordlist *keywords;
 109 static indexdata *idx;
 110 static xhtmlfile *topfile;
 111 static xhtmlsection *topsection;
 112 static paragraph *sourceparas;
 113 static xhtmlfile *lastfile;
 114 static xhtmlfile *xhtml_last_file = NULL;
 115 static int last_level=-1, start_level;
 116 static xhtmlsection *currentsection;
 117
 118 static xhtmlconfig xhtml_configure(paragraph *source)
 119 {
 120   xhtmlconfig ret;
 121
 122   /*
 123    * Defaults.
 124    */
 125   ret.contents_depth[0] = 2;
 126   ret.contents_depth[1] = 3;
 127   ret.contents_depth[2] = 4;
 128   ret.contents_depth[3] = 5;
 129   ret.contents_depth[4] = 6;
 130   ret.contents_depth[5] = 7;
 131   ret.leaf_level = 2;
 132   ret.leaf_smallest_contents = 4;
 133   ret.leaf_contains_contents = FALSE;
 134   ret.include_version_id = TRUE;
 135   ret.author = NULL;
 136   ret.description = NULL;
 137   ret.head_end = NULL;
 138   ret.body = NULL;
 139   ret.body_start = NULL;
 140   ret.body_end = NULL;
 141   ret.address_start = NULL;
 142   ret.address_end = NULL;
 143   ret.nav_attrs = NULL;
 144   ret.suppress_address = FALSE;
 145
 146   ret.fchapter.just_numbers = FALSE;
 147   ret.fchapter.number_suffix = ustrdup(L": ");
 148   ret.nfsect = 2;
 149   ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
 150   ret.fsect[0].just_numbers = FALSE;
 151   ret.fsect[0].number_suffix = ustrdup(L": ");
 152   ret.fsect[1].just_numbers = TRUE;
 153   ret.fsect[1].number_suffix = ustrdup(L" ");
 154
 155   for (; source; source = source->next)
 156   {
 157     if (source->type == para_Config)
 158     {
 159              if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
 160         ret.contents_depth[0] = utoi(uadv(source->keyword));
 161       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
 162         ret.contents_depth[1] = utoi(uadv(source->keyword));
 163       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
 164         ret.contents_depth[2] = utoi(uadv(source->keyword));
 165       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
 166         ret.contents_depth[3] = utoi(uadv(source->keyword));
 167       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
 168         ret.contents_depth[4] = utoi(uadv(source->keyword));
 169       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
 170         ret.contents_depth[5] = utoi(uadv(source->keyword));
 171       } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
 172         ret.leaf_level = utoi(uadv(source->keyword));
 173       } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
 174         ret.leaf_smallest_contents = utoi(uadv(source->keyword));
 175       } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
 176         ret.include_version_id = utob(uadv(source->keyword));
 177       } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
 178         ret.leaf_contains_contents = utob(uadv(source->keyword));
 179       } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
 180         ret.suppress_address = utob(uadv(source->keyword));
 181       } else if (!ustricmp(source->keyword, L"xhtml-author")) {
 182         ret.author = uadv(source->keyword);
 183       } else if (!ustricmp(source->keyword, L"xhtml-description")) {
 184         ret.description = uadv(source->keyword);
 185       } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
 186         ret.head_end = uadv(source->keyword);
 187       } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
 188         ret.body_start = uadv(source->keyword);
 189       } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
 190         ret.body = uadv(source->keyword);
 191       } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
 192         ret.body_end = uadv(source->keyword);
 193       } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
 194         ret.address_start = uadv(source->keyword);
 195       } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
 196         ret.address_end = uadv(source->keyword);
 197       } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
 198         ret.nav_attrs = uadv(source->keyword);
 199       } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
 200         ret.fchapter.just_numbers = utob(uadv(source->keyword));
 201       } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
 202         ret.fchapter.number_suffix = ustrdup(uadv(source->keyword));
 203       } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
 204         wchar_t *p = uadv(source->keyword);
 205         int n = 0;
 206         if (uisdigit(*p)) {
 207           n = utoi(p);
 208           p = uadv(p);
 209         }
 210         if (n >= ret.nfsect) {
 211           int i;
 212           ret.fsect = resize(ret.fsect, n+1);
 213           for (i = ret.nfsect; i <= n; i++)
 214             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 215           ret.nfsect = n+1;
 216         }
 217         ret.fsect[n].just_numbers = utob(p);
 218       } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
 219         wchar_t *p = uadv(source->keyword);
 220         int n = 0;
 221         if (uisdigit(*p)) {
 222           n = utoi(p);
 223           p = uadv(p);
 224         }
 225         if (n >= ret.nfsect) {
 226           int i;
 227           ret.fsect = resize(ret.fsect, n+1);
 228           for (i = ret.nfsect; i <= n; i++)
 229             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 230           ret.nfsect = n+1;
 231         }
 232         ret.fsect[n].number_suffix = ustrdup(p);
 233       }
 234     }
 235   }
 236
 237   /*  printf(" !!! leaf_level = %i\n", ret.leaf_level);
 238   printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
 239   printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
 240   printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
 241   printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
 242   printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
 243   printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
 244   printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
 245   return ret;
 246 }
 247
 248 static xhtmlsection *xhtml_new_section(xhtmlsection *last)
 249 {
 250   xhtmlsection *ret = mknew(xhtmlsection);
 251   ret->next=NULL;
 252   ret->child=NULL;
 253   ret->parent=NULL;
 254   ret->chain=last;
 255   ret->para=NULL;
 256   ret->file=NULL;
 257   ret->fragment=NULL;
 258   ret->level=-1; /* marker: end of chain */
 259   return ret;
 260 }
 261
 262 /* Returns NULL or the section that marks that paragraph */
 263 static xhtmlsection *xhtml_find_section(paragraph *p)
 264 {
 265   xhtmlsection *ret = topsection;
 266   if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
 267     paragraph *p2 = sourceparas;
 268     paragraph *p3 = NULL;
 269     while (p2 && p2!=p) {
 270       if (xhtml_para_level(p2)!=-1) {
 271         p3 = p2;
 272       }
 273       p2=p2->next;
 274     }
 275     if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
 276       /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
 277        * So don't do that, then.
 278        */
 279       return NULL;
 280     }
 281     p=p3;
 282   }
 283   while (ret && ret->para != p) {
 284 /*    printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
 285     ret=ret->chain;
 286   }
 287   return ret;
 288 }
 289
 290 static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
 291 {
 292   xhtmlfile *ret = mknew(xhtmlfile);
 293
 294   ret->next=NULL;
 295   ret->child=NULL;
 296   ret->parent=NULL;
 297   ret->filename=NULL;
 298   ret->sections=sect;
 299   ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
 300   if (sect==NULL) {
 301     if (conf.leaf_level==0) { /* currently unused */
 302 #define FILENAME_MANUAL "Manual.html"
 303 #define FILENAME_CONTENTS "Contents.html"
 304       ret->filename = smalloc(strlen(FILENAME_MANUAL)+1);
 305       sprintf(ret->filename, FILENAME_MANUAL);
 306     } else {
 307       ret->filename = smalloc(strlen(FILENAME_CONTENTS)+1);
 308       sprintf(ret->filename, FILENAME_CONTENTS);
 309     }
 310   } else {
 311     paragraph *p = sect->para;
 312     rdstringc fname_c = { 0, 0, NULL };
 313     char *c;
 314     word *w;
 315     for (w=(p->kwtext)?(p->kwtext):(p->words); w; w=w->next)
 316     {
 317       switch (removeattr(w->type))
 318       {
 319       case word_Normal:
 320         /*case word_Emph:
 321         case word_Code:
 322         case word_WeakCode:*/
 323         xhtml_utostr(w->text, &c);
 324         rdaddsc(&fname_c,c);
 325         sfree(c);
 326         break;
 327       }
 328     }
 329     rdaddsc(&fname_c, ".html");
 330     ret->filename = rdtrimc(&fname_c);
 331   }
 332   /*  printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
 333   return ret;
 334 }
 335
 336 /*
 337  * Walk the tree fixing up files which are actually leaf (ie
 338  * have no children) but aren't at leaf level, so they have the
 339  * leaf flag set.
 340  */
 341 void xhtml_fixup_layout(xhtmlfile* file)
 342 {
 343   if (file->child==NULL) {
 344     file->is_leaf = TRUE;
 345   } else {
 346     xhtml_fixup_layout(file->child);
 347   }
 348   if (file->next)
 349     xhtml_fixup_layout(file->next);
 350 }
 351
 352 /*
 353  * Create the tree structure so we know where everything goes.
 354  * Method:
 355  *
 356  * Ignoring file splitting, we have three choices with each new section:
 357  *
 358  * +-----------------+-----------------+
 359  * |                 |                 |
 360  * X            +----X----+           (1)
 361  *              |         |
 362  *              Y        (2)
 363  *              |
 364  *             (3)
 365  *
 366  * Y is the last section we added (currentsect).
 367  * If sect is the section we want to add, then:
 368  *
 369  * (1) if sect->level < currentsect->level
 370  * (2) if sect->level == currentsect->level
 371  * (3) if sect->level > currentsect->level
 372  *
 373  * This requires the constraint that you never skip section numbers
 374  * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
 375  *
 376  * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
 377  * more than one level at a time. Lots of asserts, and probably part of
 378  * the algorithm here, rely on this being true. (It currently isn't
 379  * enforced by halibut, however.)
 380  *
 381  * File splitting makes this harder. For instance, say we added at (3)
 382  * above and now need to add another section. We are splitting at level
 383  * 2, ie the level of Y. Z is the last section we added:
 384  *
 385  * +-----------------+-----------------+
 386  * |                 |                 |
 387  * X            +----X----+           (1)
 388  *              |         |
 389  *         +----Y----+   (1)
 390  *         |         |
 391  *         Z        (2)
 392  *         |
 393  *        (3)
 394  *
 395  * The (1) case is now split; we need to search upwards to find where
 396  * to actually link in. The other two cases remain the same (and will
 397  * always be like this).
 398  *
 399  * File splitting makes this harder, however. The decision of whether
 400  * to split to a new file is always on the same condition, however (is
 401  * the level of this section higher than the leaf_level configuration
 402  * value or not).
 403  *
 404  * Treating the cases backwards:
 405  *
 406  * (3) same file if sect->level > conf.leaf_level, otherwise new file
 407  *
 408  *     if in the same file, currentsect->child points to sect
 409  *     otherwise the linking is done through the file tree (which works
 410  *     in more or less the same way, ie currentfile->child points to
 411  *     the new file)
 412  *
 413  * (2) same file if sect->level > conf.leaf_level, otherwise new file
 414  *
 415  *     if in the same file, currentsect->next points to sect
 416  *     otherwise file linking and currentfile->next points to the new
 417  *     file (we know that Z must have caused a new file to be created)
 418  *
 419  * (1) same file if sect->level > conf.leaf_level, otherwise new file
 420  *
 421  *     this is actually effectively the same case as (2) here,
 422  *     except that we first have to travel up the sections to figure
 423  *     out which section this new one will be a sibling of. In doing
 424  *     so, we may disappear off the top of a file and have to go up
 425  *     to its parent in the file tree.
 426  *
 427  */
 428 static void xhtml_ponder_layout(paragraph *p)
 429 {
 430   xhtmlsection *lastsection;
 431   xhtmlsection *currentsect;
 432   xhtmlfile *currentfile;
 433
 434   lastfile = NULL;
 435   topsection = xhtml_new_section(NULL);
 436   topfile = xhtml_new_file(NULL);
 437   lastsection = topsection;
 438   currentfile = topfile;
 439   currentsect = topsection;
 440
 441   if (conf.leaf_level == 0) {
 442     topfile->is_leaf = 1;
 443     topfile->sections = topsection;
 444     topsection->file = topfile;
 445   }
 446
 447   for (; p; p=p->next)
 448   {
 449     int level = xhtml_para_level(p);
 450     if (level>0) /* actually a section */
 451     {
 452       xhtmlsection *sect;
 453       word *w;
 454       char *c;
 455       rdstringc fname_c = { 0, 0, NULL };
 456
 457       sect = xhtml_new_section(lastsection);
 458       lastsection = sect;
 459       sect->para = p;
 460       for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
 461       {
 462         switch (removeattr(w->type))
 463         {
 464         case word_Normal:
 465          /*case word_Emph:
 466          case word_Code:
 467          case word_WeakCode:*/
 468           xhtml_utostr(w->text, &c);
 469           rdaddsc(&fname_c,c);
 470           sfree(c);
 471           break;
 472         }
 473       }
 474 /*      rdaddsc(&fname_c, ".html");*/
 475       sect->fragment = rdtrimc(&fname_c);
 476       sect->level = level;
 477       /*      printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
 478
 479       if (level>currentsect->level) { /* case (3) */
 480         if (level>conf.leaf_level) { /* same file */
 481           assert(currentfile->is_leaf);
 482           currentsect->child = sect;
 483           sect->parent=currentsect;
 484           sect->file=currentfile;
 485           /*          printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
 486           currentsect=sect;
 487         } else { /* new file */
 488           xhtmlfile *file = xhtml_new_file(sect);
 489           assert(!currentfile->is_leaf);
 490           currentfile->child=file;
 491           sect->file=file;
 492           file->parent=currentfile;
 493           /*          printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
 494           currentfile=file;
 495           currentsect=sect;
 496         }
 497       } else if (level >= currentsect->file->sections->level) {
 498         /* Case (1) or (2) *AND* still under the section that starts
 499          * the current file.
 500          *
 501          * I'm not convinced that this couldn't be rolled in with the
 502          * final else {} leg further down. It seems a lot of effort
 503          * this way.
 504          */
 505         if (level>conf.leaf_level) { /* stick within the same file */
 506           assert(currentfile->is_leaf);
 507           sect->file = currentfile;
 508           while (currentsect && currentsect->level > level &&
 509                  currentsect->file==currentsect->parent->file) {
 510             currentsect = currentsect->parent;
 511           }
 512           assert(currentsect);
 513           currentsect->next = sect;
 514           assert(currentsect->level == sect->level);
 515           sect->parent = currentsect->parent;
 516           currentsect = sect;
 517           /*          printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
 518         } else { /* new file */
 519           xhtmlfile *file = xhtml_new_file(sect);
 520           sect->file=file;
 521           currentfile->next=file;
 522           file->parent=currentfile->parent;
 523           file->is_leaf=(level==conf.leaf_level);
 524           file->sections=sect;
 525           /*          printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
 526           currentfile=file;
 527           currentsect=sect;
 528         }
 529       } else { /* Case (1) or (2) and we must move up the file tree first */
 530         /* this loop is now probably irrelevant - we know we can't connect
 531          * to anything in the current file */
 532         while (currentsect && level<currentsect->level) {
 533           currentsect=currentsect->parent;
 534           if (currentsect) {
 535             /*            printf(" * up one level to '%s'\n", currentsect->fragment);*/
 536           } else {
 537             /*            printf(" * up one level (off top of current file)\n");*/
 538           }
 539         }
 540         if (currentsect) {
 541           /* I'm pretty sure this can now never fire */
 542           assert(currentfile->is_leaf);
 543           /*          printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
 544           sect->file = currentfile;
 545           currentsect->next=sect;
 546           currentsect=sect;
 547         } else { /* find a file we can attach to */
 548           while (currentfile && currentfile->sections && level<currentfile->sections->level) {
 549             currentfile=currentfile->parent;
 550             if (currentfile) {
 551               /*              printf(" * up one file level to '%s'\n", currentfile->filename);*/
 552             } else {
 553               /*              printf(" * up one file level (off top of tree)\n");*/
 554             }
 555           }
 556           if (currentfile) { /* new file (we had to skip up a file to
 557                                 get here, so we must be dealing with a
 558                                 level no lower than the configured
 559                                 leaf_level */
 560             xhtmlfile *file = xhtml_new_file(sect);
 561             currentfile->next=file;
 562             sect->file=file;
 563             file->parent=currentfile->parent;
 564             file->is_leaf=(level==conf.leaf_level);
 565             file->sections=sect;
 566             /*            printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
 567             currentfile=file;
 568             currentsect=sect;
 569           } else {
 570             fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
 571           }
 572         }
 573       }
 574     }
 575   }
 576   topsection = lastsection; /* get correct end of the chain */
 577   xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
 578 }
 579
 580 static void xhtml_do_index();
 581 static void xhtml_do_file(xhtmlfile *file);
 582 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
 583 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable);
 584 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
 585 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
 586 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
 587 static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
 588 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
 589 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
 590
 591 /*
 592  * Do all the files in this structure.
 593  */
 594 static void xhtml_do_files(xhtmlfile *file)
 595 {
 596   xhtml_do_file(file);
 597   if (file->child)
 598     xhtml_do_files(file->child);
 599   if (file->next)
 600     xhtml_do_files(file->next);
 601 }
 602
 603 /*
 604  * Free up all memory used by the file tree from 'xfile' downwards
 605  */
 606 static void xhtml_free_file(xhtmlfile* xfile)
 607 {
 608   if (xfile==NULL) {
 609     return;
 610   }
 611
 612   if (xfile->filename) {
 613     sfree(xfile->filename);
 614   }
 615   xhtml_free_file(xfile->child);
 616   xhtml_free_file(xfile->next);
 617   sfree(xfile);
 618 }
 619
 620 /*
 621  * Main function.
 622  */
 623 void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
 624                    indexdata *in_idx)
 625 {
 626 /*  int i;*/
 627   indexentry *ientry;
 628   int ti;
 629   xhtmlsection *xsect;
 630
 631   sourceparas = sourceform;
 632   conf = xhtml_configure(sourceform);
 633   keywords = in_keywords;
 634   idx = in_idx;
 635
 636   /* Clear up the index entries backend data pointers */
 637   for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 638     ientry->backend_data=NULL;
 639   }
 640
 641   xhtml_ponder_layout(sourceform);
 642
 643   /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
 644 /*  xhtml_level_0(sourceform);
 645   for (i=1; i<=conf.leaf_level; i++)
 646   {
 647     xhtml_level(sourceform, i);
 648   }*/
 649
 650   /* new system ... (writes to *.html, but isn't fully trusted) */
 651   xhtml_do_top_file(topfile, sourceform);
 652   assert(!topfile->next); /* shouldn't have a sibling at all */
 653   if (topfile->child) {
 654     xhtml_do_files(topfile->child);
 655     xhtml_do_index();
 656   }
 657
 658   /* release file, section, index data structures */
 659   xsect = topsection;
 660   while (xsect) {
 661     xhtmlsection *tmp = xsect->chain;
 662     if (xsect->fragment) {
 663       sfree(xsect->fragment);
 664     }
 665     sfree(xsect);
 666     xsect = tmp;
 667   }
 668   xhtml_free_file(topfile);
 669   for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 670     if (ientry->backend_data!=NULL) {
 671       xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
 672       if (xi->sections!=NULL) {
 673         sfree(xi->sections);
 674       }
 675       sfree(xi);
 676     }
 677     ientry->backend_data = NULL;
 678   }
 679   {
 680     int i;
 681     sfree(conf.fchapter.number_suffix);
 682     for (i = 0; i < conf.nfsect; i++)
 683       sfree(conf.fsect[i].number_suffix);
 684     sfree(conf.fsect);
 685   }
 686 }
 687
 688 static int xhtml_para_level(paragraph *p)
 689 {
 690   switch (p->type)
 691   {
 692   case para_Title:
 693     return 0;
 694     break;
 695   case para_UnnumberedChapter:
 696   case para_Chapter:
 697   case para_Appendix:
 698     return 1;
 699     break;
 700 /*  case para_BiblioCited:
 701     return 2;
 702     break;*/
 703   case para_Heading:
 704   case para_Subsect:
 705     return p->aux+2;
 706     break;
 707   default:
 708     return -1;
 709     break;
 710   }
 711 }
 712
 713 static char* xhtml_index_filename = "IndexPage.html";
 714
 715 /* Output the nav links for the current file.
 716  * file == NULL means we're doing the index
 717  */
 718 static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
 719 {
 720   xhtmlfile *xhtml_next_file = NULL;
 721   fprintf(fp, "<p");
 722   if (conf.nav_attrs!=NULL) {
 723     fprintf(fp, " %ls>", conf.nav_attrs);
 724   } else {
 725     fprintf(fp, ">");
 726   }
 727   if (xhtml_last_file==NULL) {
 728     fprintf(fp, "Previous | ");
 729   } else {
 730     fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
 731   }
 732   fprintf(fp, "<a href='Contents.html'>Contents</a> | ");
 733   if (file == NULL) {
 734     fprintf(fp, "Index | ");
 735   } else {
 736     fprintf(fp, "<a href='%s'>Index</a> | ", xhtml_index_filename);
 737   }
 738   if (file != NULL) { /* otherwise we're doing nav links for the index */
 739     if (xhtml_next_file==NULL)
 740       xhtml_next_file = file->child;
 741     if (xhtml_next_file==NULL)
 742       xhtml_next_file = file->next;
 743     if (xhtml_next_file==NULL)
 744       xhtml_next_file = file->parent->next;
 745   }
 746   if (xhtml_next_file==NULL) {
 747     if (file==NULL) { /* index, so no next file */
 748       fprintf(fp, "Next ");
 749     } else {
 750       fprintf(fp, "<a href='%s'>Next</a>", xhtml_index_filename);
 751     }
 752   } else {
 753     fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
 754   }
 755   fprintf(fp, "</p>\n");
 756 }
 757
 758 /* Write out the index file */
 759 static void xhtml_do_index_body(FILE *fp)
 760 {
 761   indexentry *y;
 762   int ti;
 763
 764   if (count234(idx->entries) == 0)
 765     return;                            /* don't write anything at all */
 766
 767   fprintf(fp, "<dl>\n");
 768   /* iterate over idx->entries using the tree functions and display everything */
 769   for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
 770     if (y->backend_data) {
 771       int i;
 772       xhtmlindex *xi;
 773
 774       fprintf(fp, "<dt>");
 775       xhtml_para(fp, y->text, FALSE);
 776       fprintf(fp, "</dt>\n<dd>");
 777
 778       xi = (xhtmlindex*) y->backend_data;
 779       for (i=0; i<xi->nsection; i++) {
 780         xhtmlsection *sect = xi->sections[i];
 781         if (sect) {
 782           fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
 783           if (sect->para->kwtext) {
 784             xhtml_para(fp, sect->para->kwtext, FALSE);
 785           } else if (sect->para->words) {
 786             xhtml_para(fp, sect->para->words, FALSE);
 787           }
 788           fprintf(fp, "</a>");
 789           if (i+1<xi->nsection) {
 790             fprintf(fp, ", ");
 791           }
 792         }
 793       }
 794       fprintf(fp, "</dd>\n");
 795     }
 796   }
 797   fprintf(fp, "</dl>\n");
 798 }
 799 static void xhtml_do_index()
 800 {
 801   word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
 802   FILE *fp = fopen(xhtml_index_filename, "w");
 803
 804   if (fp==NULL)
 805     fatal(err_cantopenw, xhtml_index_filename);
 806   xhtml_doheader(fp, &temp_word);
 807   xhtml_donavlinks(fp, NULL);
 808
 809   xhtml_do_index_body(fp);
 810
 811   xhtml_donavlinks(fp, NULL);
 812   xhtml_dofooter(fp);
 813   fclose(fp);
 814 }
 815
 816 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
 817 static void xhtml_do_file(xhtmlfile *file)
 818 {
 819   FILE *fp = fopen(file->filename, "w");
 820   if (fp==NULL)
 821     fatal(err_cantopenw, file->filename);
 822
 823   if (file->sections->para->words) {
 824     xhtml_doheader(fp, file->sections->para->words);
 825   } else if (file->sections->para->kwtext) {
 826     xhtml_doheader(fp, file->sections->para->kwtext);
 827   } else {
 828     xhtml_doheader(fp, NULL);
 829   }
 830
 831   xhtml_donavlinks(fp, file);
 832
 833   if (file->is_leaf && conf.leaf_contains_contents &&
 834       xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
 835     xhtml_do_contents(fp, file);
 836   xhtml_do_sections(fp, file->sections);
 837   if (!file->is_leaf)
 838     xhtml_do_naked_contents(fp, file);
 839
 840   xhtml_donavlinks(fp, file);
 841
 842   xhtml_dofooter(fp);
 843   fclose(fp);
 844
 845   xhtml_last_file = file;
 846 }
 847
 848 /* Output the top-level file. */
 849 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
 850 {
 851   paragraph *p;
 852   int done=FALSE;
 853   FILE *fp = fopen(file->filename, "w");
 854   if (fp==NULL)
 855     fatal(err_cantopenw, file->filename);
 856
 857   /* Do the title -- only one allowed */
 858   for (p = sourceform; p && !done; p = p->next)
 859   {
 860     if (p->type == para_Title)
 861     {
 862       xhtml_doheader(fp, p->words);
 863       done=TRUE;
 864     }
 865   }
 866   if (!done)
 867     xhtml_doheader(fp, NULL /* Eek! */);
 868
 869   /*
 870    * Display the title.
 871    */
 872   for (p = sourceform; p; p = p->next)
 873   {
 874     if (p->type == para_Title) {
 875       xhtml_heading(fp, p, FALSE);
 876       break;
 877     }
 878   }
 879
 880   /* Do the preamble */
 881   for (p = sourceform; p; p = p->next)
 882   {
 883     if (p->type == para_Chapter || p->type == para_Heading ||
 884         p->type == para_Subsect || p->type == para_Appendix ||
 885         p->type == para_UnnumberedChapter) {
 886         /*
 887          * We've found the end of the preamble. Do every normal
 888          * paragraph up to there.
 889          */
 890         xhtml_do_paras(fp, sourceform, p, FALSE);
 891         break;
 892     }
 893   }
 894
 895   xhtml_do_contents(fp, file);
 896   xhtml_do_sections(fp, file->sections);
 897
 898   /*
 899    * Put the index in the top file if we're in single-file mode
 900    * (leaf-level 0).
 901    */
 902   if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
 903     fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
 904     xhtml_do_index_body(fp);
 905   }
 906
 907   xhtml_dofooter(fp);
 908   fclose(fp);
 909 }
 910
 911 /* Convert a Unicode string to an ASCII one. '?' is
 912  * used for unmappable characters.
 913  */
 914 static void xhtml_utostr(wchar_t *in, char **out)
 915 {
 916   int l = ustrlen(in);
 917   int i;
 918   *out = smalloc(l+1);
 919   for (i=0; i<l; i++)
 920   {
 921     if (in[i]>=32 && in[i]<=126)
 922       (*out)[i]=(char)in[i];
 923     else
 924       (*out)[i]='?';
 925   }
 926   (*out)[i]=0;
 927 }
 928
 929 /*
 930  * Write contents for the given file, and subfiles, down to
 931  * the appropriate contents depth. Returns the number of
 932  * entries written.
 933  */
 934 static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
 935 {
 936   int level, limit, count = 0;
 937   if (!file)
 938     return 0;
 939
 940   level = (file->sections)?(file->sections->level):(0);
 941   limit = conf.contents_depth[(level>5)?(5):(level)];
 942   start_level = (file->is_leaf) ? (level-1) : (level);
 943   last_level = start_level;
 944
 945   count += xhtml_do_contents_section_limit(fp, file->sections, limit);
 946   count += xhtml_do_contents_limit(fp, file->child, limit);
 947   if (fp!=NULL) {
 948     while (last_level > start_level) {
 949       last_level--;
 950       fprintf(fp, "</li></ul>\n");
 951     }
 952   }
 953   return count;
 954 }
 955
 956 /* As above, but doesn't do anything in the current file */
 957 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
 958 {
 959   int level, limit, start_level, count = 0;
 960   if (!file)
 961     return 0;
 962
 963   level = (file->sections)?(file->sections->level):(0);
 964   limit = conf.contents_depth[(level>5)?(5):(level)];
 965   start_level = (file->is_leaf) ? (level-1) : (level);
 966   last_level = start_level;
 967
 968   count = xhtml_do_contents_limit(fp, file->child, limit);
 969   if (fp!=NULL) {
 970     while (last_level > start_level) {
 971       last_level--;
 972       fprintf(fp, "</li></ul>\n");
 973     }
 974   }
 975   return count;
 976 }
 977
 978 /*
 979  * Write contents for the given file, children, and siblings, down to
 980  * given limit contents depth.
 981  */
 982 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
 983 {
 984   int count = 0;
 985   while (file) {
 986     count += xhtml_do_contents_section_limit(fp, file->sections, limit);
 987     count += xhtml_do_contents_limit(fp, file->child, limit);
 988     file = file->next;
 989   }
 990   return count;
 991 }
 992
 993 /*
 994  * Write contents entries for the given section tree, down to the
 995  * limit contents depth.
 996  */
 997 static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
 998 {
 999   int count = 0;
1000   while (section) {
1001     if (!xhtml_add_contents_entry(fp, section, limit))
1002       return 0;
1003     else
1004       count++;
1005     count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1006     section = section->next;
1007   }
1008   return count;
1009 }
1010
1011 /*
1012  * Write contents entries for the given section tree, down to the
1013  * limit contents depth.
1014  */
1015 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1016 {
1017   int count = 0;
1018   if (!section)
1019     return 0;
1020   xhtml_add_contents_entry(fp, section, limit);
1021   count=1;
1022   count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1023   /*  section=section->child;
1024   while (section && xhtml_add_contents_entry(fp, section, limit)) {
1025     section = section->next;
1026     }*/
1027   return count;
1028 }
1029
1030 /*
1031  * Add a section entry, unless we're exceeding the limit, in which
1032  * case return FALSE (otherwise return TRUE).
1033  */
1034 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1035 {
1036   if (!section || section->level > limit)
1037     return FALSE;
1038   if (fp==NULL || section->level < 0)
1039     return TRUE;
1040   if (last_level > section->level) {
1041     while (last_level > section->level) {
1042       last_level--;
1043       fprintf(fp, "</li></ul>\n");
1044     }
1045     fprintf(fp, "</li>\n");
1046   } else if (last_level < section->level) {
1047     assert(last_level == section->level - 1);
1048     last_level++;
1049     fprintf(fp, "<ul>\n");
1050   } else {
1051     fprintf(fp, "</li>\n");
1052   }
1053   fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1054   if (section->para->kwtext) {
1055     xhtml_para(fp, section->para->kwtext, FALSE);
1056     if (section->para->words) {
1057       fprintf(fp, ": ");
1058     }
1059   }
1060   if (section->para->words) {
1061     xhtml_para(fp, section->para->words, FALSE);
1062   }
1063   fprintf(fp, "</a>\n");
1064   return TRUE;
1065 }
1066
1067 /*
1068  * Write all the sections in this file. Do all paragraphs in this section, then all
1069  * children (recursively), then go on to the next one (tail recursively).
1070  */
1071 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1072 {
1073   while (sections) {
1074     currentsection = sections;
1075     xhtml_do_paras(fp, sections->para, NULL, TRUE);
1076     xhtml_do_sections(fp, sections->child);
1077     sections = sections->next;
1078   }
1079 }
1080
1081 /* Write this list of paragraphs. Close off all lists at the end. */
1082 static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end,
1083                            int indexable)
1084 {
1085   int last_type = -1, ptype, first=TRUE;
1086   stack lcont_stack = stk_new();
1087   if (!p)
1088     return;
1089
1090 /*  for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1091   for (; p && p != end && (xhtml_para_level(p)==-1 || first); p=p->next) {
1092     first=FALSE;
1093     switch (ptype = p->type)
1094     {
1095       /*
1096        * Things we ignore because we've already processed them or
1097        * aren't going to touch them in this pass.
1098        */
1099      case para_IM:
1100      case para_BR:
1101      case para_Biblio:                 /* only touch BiblioCited */
1102      case para_VersionID:
1103      case para_NoCite:
1104      case para_Title:
1105        break;
1106
1107        /*
1108         * Chapter titles.
1109         */
1110       case para_Chapter:
1111       case para_Appendix:
1112       case para_UnnumberedChapter:
1113         xhtml_heading(fp, p, indexable);
1114         break;
1115
1116       case para_Heading:
1117       case para_Subsect:
1118         xhtml_heading(fp, p, indexable);
1119         break;
1120
1121       case para_Rule:
1122         fprintf(fp, "\n<hr />\n");
1123         break;
1124
1125       case para_Normal:
1126       case para_Copyright:
1127         fprintf(fp, "\n<p>");
1128         xhtml_para(fp, p->words, indexable);
1129         fprintf(fp, "</p>\n");
1130         break;
1131
1132       case para_LcontPush:
1133         {
1134             int *p;
1135             p = mknew(int);
1136             *p = last_type;
1137             stk_push(lcont_stack, p);
1138             last_type = para_Normal;
1139         }
1140         break;
1141       case para_LcontPop:
1142         {
1143             int *p = stk_pop(lcont_stack);
1144             assert(p);
1145             ptype = last_type = *p;
1146             sfree(p);
1147             goto closeofflist;         /* ick */
1148         }
1149         break;
1150       case para_QuotePush:
1151         fprintf(fp, "<blockquote>\n");
1152         break;
1153       case para_QuotePop:
1154         fprintf(fp, "</blockquote>\n");
1155         break;
1156
1157       case para_Bullet:
1158       case para_NumberedList:
1159       case para_Description:
1160       case para_DescribedThing:
1161       case para_BiblioCited:
1162         if (last_type!=p->type &&
1163             !(last_type==para_DescribedThing && p->type==para_Description) &&
1164             !(last_type==para_Description && p->type==para_DescribedThing)) {
1165           /* start up list if necessary */
1166           if (p->type == para_Bullet) {
1167             fprintf(fp, "<ul>\n");
1168           } else if (p->type == para_NumberedList) {
1169             fprintf(fp, "<ol>\n");
1170           } else if (p->type == para_BiblioCited ||
1171                      p->type == para_DescribedThing ||
1172                      p->type == para_Description) {
1173             fprintf(fp, "<dl>\n");
1174           }
1175         }
1176         if (p->type == para_Bullet || p->type == para_NumberedList) {
1177           fprintf(fp, "<li>");
1178         } else if (p->type == para_DescribedThing) {
1179           fprintf(fp, "<dt>");
1180         } else if (p->type == para_Description) {
1181           fprintf(fp, "<dd>");
1182         } else if (p->type == para_BiblioCited) {
1183           fprintf(fp, "<dt>");
1184           xhtml_para(fp, p->kwtext, indexable);
1185           fprintf(fp, "</dt>\n<dd>");
1186         }
1187         xhtml_para(fp, p->words, indexable);
1188         {
1189           paragraph *p2 = p->next;
1190           if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
1191             break;
1192         }
1193
1194         closeofflist:
1195         if (ptype == para_BiblioCited) {
1196           fprintf(fp, "</dd>\n");
1197         } else if (ptype == para_DescribedThing) {
1198           fprintf(fp, "</dt>");
1199         } else if (ptype == para_Description) {
1200           fprintf(fp, "</dd>");
1201         } else if (ptype == para_Bullet || ptype == para_NumberedList) {
1202           fprintf(fp, "</li>");
1203         }
1204         if (ptype == para_Bullet || ptype == para_NumberedList ||
1205             ptype == para_BiblioCited || ptype == para_Description ||
1206             ptype == para_DescribedThing)
1207           /* close off list if necessary */
1208         {
1209           paragraph *p2 = p->next;
1210           int close_off=FALSE;
1211 /*          if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1212           if (p2 && xhtml_para_level(p2)==-1) {
1213             if (p2->type != ptype &&
1214                 !(p2->type==para_DescribedThing && ptype==para_Description) &&
1215                 !(p2->type==para_Description && ptype==para_DescribedThing) &&
1216                 p2->type != para_LcontPush)
1217               close_off=TRUE;
1218           } else {
1219             close_off=TRUE;
1220           }
1221           if (close_off) {
1222             if (ptype == para_Bullet) {
1223               fprintf(fp, "</ul>\n");
1224             } else if (ptype == para_NumberedList) {
1225               fprintf(fp, "</ol>\n");
1226             } else if (ptype == para_BiblioCited ||
1227                        ptype == para_Description ||
1228                        ptype == para_DescribedThing) {
1229               fprintf(fp, "</dl>\n");
1230             }
1231           }
1232         }
1233         break;
1234
1235       case para_Code:
1236         xhtml_codepara(fp, p->words);
1237         break;
1238     }
1239     last_type = ptype;
1240   }
1241
1242   stk_free(lcont_stack);
1243 }
1244
1245 /*
1246  * Output a header for this XHTML file.
1247  */
1248 static void xhtml_doheader(FILE *fp, word *title)
1249 {
1250   fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1251   fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1252   fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1253   if (title==NULL)
1254     fprintf(fp, "The thing with no name!");
1255   else
1256     xhtml_para(fp, title, FALSE);
1257   fprintf(fp, "</title>\n");
1258   fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1259   if (conf.author)
1260     fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1261   if (conf.description)
1262     fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1263   if (conf.head_end)
1264     fprintf(fp, "%ls\n", conf.head_end);
1265   fprintf(fp, "</head>\n\n");
1266   if (conf.body)
1267     fprintf(fp, "%ls\n", conf.body);
1268   else
1269     fprintf(fp, "<body>\n");
1270   if (conf.body_start)
1271     fprintf(fp, "%ls\n", conf.body_start);
1272 }
1273
1274 /*
1275  * Output a footer for this XHTML file.
1276  */
1277 static void xhtml_dofooter(FILE *fp)
1278 {
1279   fprintf(fp, "\n<hr />\n\n");
1280   if (conf.body_end)
1281     fprintf(fp, "%ls\n", conf.body_end);
1282   if (!conf.suppress_address) {
1283     fprintf(fp,"<address>\n");
1284     if (conf.address_start)
1285       fprintf(fp, "%ls\n", conf.address_start);
1286     /* Do the version ID */
1287     if (conf.include_version_id) {
1288       paragraph *p;
1289       int started = 0;
1290       for (p = sourceparas; p; p = p->next)
1291         if (p->type == para_VersionID) {
1292           xhtml_versionid(fp, p->words, started);
1293           started = 1;
1294         }
1295     }
1296     if (conf.address_end)
1297       fprintf(fp, "%ls\n", conf.address_end);
1298     fprintf(fp, "</address>\n");
1299   }
1300   fprintf(fp, "</body>\n\n</html>\n");
1301 }
1302
1303 /*
1304  * Output the versionid paragraph. Typically this is a version control
1305  * ID string (such as $Id...$ in RCS).
1306  */
1307 static void xhtml_versionid(FILE *fp, word *text, int started)
1308 {
1309   rdstringc t = { 0, 0, NULL };
1310
1311   rdaddc(&t, '[');                     /* FIXME: configurability */
1312   xhtml_rdaddwc(&t, text, NULL, FALSE);
1313   rdaddc(&t, ']');                     /* FIXME: configurability */
1314
1315   if (started)
1316     fprintf(fp, "<br />\n");
1317   fprintf(fp, "%s\n", t.text);
1318   sfree(t.text);
1319 }
1320
1321 /* Is this an XHTML reserved character? */
1322 static int xhtml_reservedchar(int c)
1323 {
1324   if (c=='&' || c=='<' || c=='>' || c=='"')
1325     return TRUE;
1326   else
1327     return FALSE;
1328 }
1329
1330 /*
1331  * Convert a wide string into valid XHTML: Anything outside ASCII will
1332  * be fixed up as an entity. Currently we don't worry about constraining the
1333  * encoded character set, which we should probably do at some point (we can
1334  * still fix up and return FALSE - see the last comment here). We also don't
1335  * currently
1336  *
1337  * Because this is only used for words, spaces are HARD spaces (any other
1338  * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1339  * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1340  * rule).
1341  *
1342  * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1343  * it in `*result'. If `result' is NULL, merely checks whether all
1344  * characters in the string are feasible.
1345  *
1346  * Return is nonzero if all characters are OK. If not all
1347  * characters are OK but `result' is non-NULL, a result _will_
1348  * still be generated!
1349  */
1350 static int xhtml_convert(wchar_t *s, int maxlen, char **result,
1351                          int hard_spaces) {
1352     int doing = (result != 0);
1353     int ok = TRUE;
1354     char *p = NULL;
1355     int plen = 0, psize = 0;
1356
1357     if (maxlen <= 0)
1358         maxlen = -1;
1359
1360     for (; *s && maxlen != 0; s++, maxlen--) {
1361         wchar_t c = *s;
1362
1363 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1364
1365         if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1366             /* Char is OK. */
1367             if (doing)
1368             {
1369               ensure_size(plen);
1370               p[plen++] = (char)c;
1371             }
1372         } else {
1373             /* Char needs fixing up. */
1374             /* ok = FALSE; -- currently we never return FALSE; we
1375              * might want to when considering a character set for the
1376              * encoded document.
1377              */
1378             if (doing)
1379             {
1380               if (c==32) { /* a space in a word is a hard space */
1381                 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1382                 sprintf(p+plen, "&nbsp;");
1383                 plen+=6;
1384               } else {
1385                 /* FIXME: entity names! */
1386                 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1387                 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1388               }
1389             }
1390         }
1391     }
1392     if (doing) {
1393         p = resize(p, plen+1);
1394         p[plen] = '\0';
1395         *result = p;
1396     }
1397     return ok;
1398 }
1399
1400 /*
1401  * This formats the given words as XHTML.
1402  *
1403  * `indexable', if FALSE, prohibits adding any index references.
1404  * You might use this, for example, if an index reference occurred
1405  * in a section title, to prevent phony index references when the
1406  * section title is processed in strange places such as contents
1407  * sections.
1408  */
1409 static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) {
1410     char *c;
1411     keyword *kwl;
1412     xhtmlsection *sect;
1413     indextag *itag;
1414     int ti;
1415
1416     for (; text && text != end; text = text->next) {
1417       switch (text->type) {
1418       case word_HyperLink:
1419         xhtml_utostr(text->text, &c);
1420         rdaddsc(rs, "<a href=\"");
1421         rdaddsc(rs, c);
1422         rdaddsc(rs, "\">");
1423         sfree(c);
1424         break;
1425
1426       case word_UpperXref:
1427       case word_LowerXref:
1428         kwl = kw_lookup(keywords, text->text);
1429         if (kwl) {
1430           sect=xhtml_find_section(kwl->para);
1431           if (sect) {
1432             rdaddsc(rs, "<a href=\"");
1433             rdaddsc(rs, sect->file->filename);
1434             rdaddc(rs, '#');
1435             rdaddsc(rs, sect->fragment);
1436             rdaddsc(rs, "\">");
1437           } else {
1438             rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1439             error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1440           }
1441         } else {
1442           rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1443           error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1444         }
1445         break;
1446
1447       case word_IndexRef: /* in theory we could make an index target here */
1448 /*        rdaddsc(rs, "<a name=\"idx-");
1449         xhtml_utostr(text->text, &c);
1450         rdaddsc(rs, c);
1451         sfree(c);
1452         rdaddsc(rs, "\"></a>");*/
1453         /* what we _do_ need to do is to fix up the backend data
1454          * for any indexentry this points to.
1455          */
1456         if (!indexable)
1457           break;
1458
1459         for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1460           /* FIXME: really ustricmp() and not ustrcmp()? */
1461           if (ustricmp(itag->name, text->text)==0) {
1462             break;
1463           }
1464         }
1465         if (itag!=NULL) {
1466           if (itag->refs!=NULL) {
1467             int i;
1468             for (i=0; i<itag->nrefs; i++) {
1469               xhtmlindex *idx_ref;
1470               indexentry *ientry;
1471
1472               ientry = itag->refs[i];
1473               if (ientry->backend_data==NULL) {
1474                 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1475                 if (idx_ref==NULL)
1476                   fatal(err_nomemory);
1477                 idx_ref->nsection = 0;
1478                 idx_ref->size = 4;
1479                 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1480                 if (idx_ref->sections==NULL)
1481                   fatal(err_nomemory);
1482                 ientry->backend_data = idx_ref;
1483               } else {
1484                 idx_ref = ientry->backend_data;
1485                 if (idx_ref->nsection+1 > idx_ref->size) {
1486                   int new_size = idx_ref->size * 2;
1487                   idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1488                   if (idx_ref->sections==NULL) {
1489                     fatal(err_nomemory);
1490                   }
1491                   idx_ref->size = new_size;
1492                 }
1493               }
1494               idx_ref->sections[idx_ref->nsection++] = currentsection;
1495 #if 0
1496 #endif
1497             }
1498           } else {
1499             fatal(err_whatever, "Index tag had no entries!");
1500           }
1501         } else {
1502           fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1503           fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1504         }
1505         break;
1506
1507       case word_HyperEnd:
1508       case word_XrefEnd:
1509         rdaddsc(rs, "</a>");
1510         break;
1511
1512       case word_Normal:
1513       case word_Emph:
1514       case word_Code:
1515       case word_WeakCode:
1516       case word_WhiteSpace:
1517       case word_EmphSpace:
1518       case word_CodeSpace:
1519       case word_WkCodeSpace:
1520       case word_Quote:
1521       case word_EmphQuote:
1522       case word_CodeQuote:
1523       case word_WkCodeQuote:
1524         assert(text->type != word_CodeQuote &&
1525                text->type != word_WkCodeQuote);
1526         if (towordstyle(text->type) == word_Emph &&
1527             (attraux(text->aux) == attr_First ||
1528              attraux(text->aux) == attr_Only))
1529             rdaddsc(rs, "<em>");
1530         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1531                  (attraux(text->aux) == attr_First ||
1532                   attraux(text->aux) == attr_Only))
1533             rdaddsc(rs, "<code>");
1534
1535         if (removeattr(text->type) == word_Normal) {
1536           if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
1537             rdaddsc(rs, c);
1538           else
1539             xhtml_rdaddwc(rs, text->alt, NULL, indexable);
1540           sfree(c);
1541         } else if (removeattr(text->type) == word_WhiteSpace) {
1542           rdaddc(rs, ' ');
1543         } else if (removeattr(text->type) == word_Quote) {
1544           rdaddsc(rs, "&quot;");
1545         }
1546
1547         if (towordstyle(text->type) == word_Emph &&
1548             (attraux(text->aux) == attr_Last ||
1549              attraux(text->aux) == attr_Only))
1550             rdaddsc(rs, "</em>");
1551         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1552                  (attraux(text->aux) == attr_Last ||
1553                   attraux(text->aux) == attr_Only))
1554             rdaddsc(rs, "</code>");
1555         break;
1556       }
1557     }
1558 }
1559
1560 /* Output a heading, formatted as XHTML.
1561  */
1562 static void xhtml_heading(FILE *fp, paragraph *p, int indexable)
1563 {
1564     rdstringc t = { 0, 0, NULL };
1565     word *tprefix = p->kwtext;
1566     word *nprefix = p->kwtext2;
1567     word *text = p->words;
1568     int level = xhtml_para_level(p);
1569     xhtmlsection *sect = xhtml_find_section(p);
1570     xhtmlheadfmt *fmt;
1571     char *fragment;
1572     if (sect) {
1573       fragment = sect->fragment;
1574     } else {
1575       if (p->type == para_Title)
1576         fragment = "title";
1577       else {
1578         fragment = ""; /* FIXME: what else can we do? */
1579         error(err_whatever, "Couldn't locate heading cross-reference!");
1580       }
1581     }
1582
1583     if (p->type == para_Title)
1584         fmt = NULL;
1585     else if (level == 1)
1586         fmt = &conf.fchapter;
1587     else if (level-1 < conf.nfsect)
1588         fmt = &conf.fsect[level-1];
1589     else
1590         fmt = &conf.fsect[conf.nfsect-1];
1591
1592     if (fmt && fmt->just_numbers && nprefix) {
1593         xhtml_rdaddwc(&t, nprefix, NULL, indexable);
1594         if (fmt) {
1595             char *c;
1596             if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1597                 rdaddsc(&t, c);
1598                 sfree(c);
1599             }
1600         }
1601     } else if (fmt && !fmt->just_numbers && tprefix) {
1602         xhtml_rdaddwc(&t, tprefix, NULL, indexable);
1603         if (fmt) {
1604             char *c;
1605             if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
1606                 rdaddsc(&t, c);
1607                 sfree(c);
1608             }
1609         }
1610     }
1611     xhtml_rdaddwc(&t, text, NULL, indexable);
1612     /*
1613      * If we're outputting in single-file mode, we need to lower
1614      * the level of each heading by one, because the overall
1615      * document title will be sitting right at the top as an <h1>
1616      * and so chapters and sections should start at <h2>.
1617      *
1618      * Even if not, the document title will come back from
1619      * xhtml_para_level() as level zero, so we must increment that
1620      * no matter what leaf_level is set to.
1621      */
1622     if (conf.leaf_level == 0 || level == 0)
1623         level++;
1624     fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1625     sfree(t.text);
1626 }
1627
1628 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1629  * This looks pretty simple; I may have missed something ...
1630  */
1631 static void xhtml_para(FILE *fp, word *text, int indexable)
1632 {
1633   rdstringc out = { 0, 0, NULL };
1634   xhtml_rdaddwc(&out, text, NULL, indexable);
1635   fprintf(fp, "%s", out.text);
1636   sfree(out.text);
1637 }
1638
1639 /* Output a code paragraph. I'm treating this as preformatted, which
1640  * may not be entirely correct. See xhtml_para() for my worries about
1641  * this being overly-simple; however I think that most of the complexity
1642  * of the text backend came entirely out of word wrapping anyway.
1643  */
1644 static void xhtml_codepara(FILE *fp, word *text)
1645 {
1646   fprintf(fp, "<pre>");
1647     for (; text; text = text->next) if (text->type == word_WeakCode) {
1648         word *here, *next;
1649         char *c;
1650
1651         /*
1652          * See if this WeakCode is followed by an Emph to indicate
1653          * emphasis.
1654          */
1655         here = text;
1656         if (text->next && text->next->type == word_Emph) {
1657             next = text = text->next;
1658         } else
1659             next = NULL;
1660
1661         if (next) {
1662             wchar_t *t, *e;
1663             int n;
1664
1665             t = here->text;
1666             e = next->text;
1667
1668             while (*e) {
1669                 int ec = *e;
1670
1671                 for (n = 0; t[n] && e[n] && e[n] == ec; n++);
1672                 xhtml_convert(t, n, &c, FALSE);
1673                 fprintf(fp, "%s%s%s",
1674                         (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
1675                         c,
1676                         (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
1677                 sfree(c);
1678
1679                 t += n;
1680                 e += n;
1681             }
1682
1683             xhtml_convert(t, 0, &c, FALSE);
1684             fprintf(fp, "%s\n", c);
1685             sfree(c);
1686         } else {
1687             xhtml_convert(here->text, 0, &c, FALSE);
1688             fprintf(fp, "%s\n", c);
1689             sfree(c);
1690         }
1691     }
1692   fprintf(fp, "</pre>\n");
1693 }