mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_xhtml.c

   1 /*
   2  * xhtml backend for Halibut
   3  * (initial implementation by James Aylett)
   4  *
   5  * Still to do:
   6  *
   7  *  +++ doesn't handle non-breaking hyphens. Not sure how to yet.
   8  *  +++ entity names (from a file -- ideally supply normal SGML files)
   9  *  +++ configuration directive to file split where the current layout
  10  *      code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
  11  *      perhaps others.
  12  *
  13  * Limitations:
  14  *
  15  *  +++ biblio/index references target the nearest section marker, rather
  16  *   than having a dedicated target themselves. In large bibliographies
  17  *   this will cause problems. (The solution is to fake up a response
  18  *   from xhtml_find_section(), probably linking it into the sections
  19  *   chain just in case we need it again, and to make freeing it up
  20  *   easier.) docsrc.pl used to work as we do, however, and SGT agrees that
  21  *   this is acceptable for now.
  22  *  +++ can't cope with leaf-level == 0. It's all to do with the
  23  *   top-level file not being normal, probably not even having a valid
  24  *   section level, and stuff like that. I question whether this is an
  25  *   issue, frankly; small manuals that fit on one page should probably
  26  *   not be written in halibut at all.
  27  */
  28
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <assert.h>
  32 #include "halibut.h"
  33
  34 struct xhtmlsection_Struct {
  35     struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
  36     struct xhtmlsection_Struct *child; /* NULL if split across files */
  37     struct xhtmlsection_Struct *parent; /* NULL if split across files */
  38     struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
  39     paragraph *para;
  40     struct xhtmlfile_Struct *file; /* which file is this a part of? */
  41     char *fragment; /* fragment id within the file */
  42     int level;
  43 };
  44
  45 struct xhtmlfile_Struct {
  46     struct xhtmlfile_Struct *next;
  47     struct xhtmlfile_Struct *child;
  48     struct xhtmlfile_Struct *parent;
  49     char *filename;
  50     struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
  51     int is_leaf; /* is this file a leaf file, ie does it not have any children? */
  52 };
  53
  54 typedef struct xhtmlsection_Struct xhtmlsection;
  55 typedef struct xhtmlfile_Struct xhtmlfile;
  56 typedef struct xhtmlindex_Struct xhtmlindex;
  57
  58 struct xhtmlindex_Struct {
  59   int nsection;
  60   int size;
  61   xhtmlsection **sections;
  62 };
  63
  64 typedef struct {
  65     int just_numbers;
  66     wchar_t *number_suffix;
  67 } xhtmlheadfmt;
  68
  69 typedef struct {
  70   int contents_depth[6];
  71   int leaf_contains_contents;
  72   int leaf_level;
  73   int leaf_smallest_contents;
  74   int include_version_id;
  75   wchar_t *author, *description;
  76   wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
  77   int suppress_address;
  78   xhtmlheadfmt fchapter, *fsect;
  79   int nfsect;
  80 } xhtmlconfig;
  81
  82 /*static void xhtml_level(paragraph *, int);
  83 static void xhtml_level_0(paragraph *);
  84 static void xhtml_docontents(FILE *, paragraph *, int);
  85 static void xhtml_dosections(FILE *, paragraph *, int);
  86 static void xhtml_dobody(FILE *, paragraph *, int);*/
  87
  88 static void xhtml_doheader(FILE *, word *);
  89 static void xhtml_dofooter(FILE *);
  90 static void xhtml_versionid(FILE *, word *, int);
  91
  92 static void xhtml_utostr(wchar_t *, char **);
  93 static int xhtml_para_level(paragraph *);
  94 static int xhtml_reservedchar(int);
  95
  96 static int xhtml_convert(wchar_t *, char **, int);
  97 static void xhtml_rdaddwc(rdstringc *, word *, word *);
  98 static void xhtml_para(FILE *, word *);
  99 static void xhtml_codepara(FILE *, word *);
 100 static void xhtml_heading(FILE *, paragraph *);
 101
 102 /* File-global variables are much easier than passing these things
 103  * all over the place. Evil, but easier. We can replace this with a single
 104  * structure at some point.
 105  */
 106 static xhtmlconfig conf;
 107 static keywordlist *keywords;
 108 static indexdata *idx;
 109 static xhtmlfile *topfile;
 110 static xhtmlsection *topsection;
 111 static paragraph *sourceparas;
 112 static xhtmlfile *lastfile;
 113 static xhtmlfile *xhtml_last_file = NULL;
 114 static int last_level=-1;
 115 static xhtmlsection *currentsection;
 116
 117 static xhtmlconfig xhtml_configure(paragraph *source)
 118 {
 119   xhtmlconfig ret;
 120
 121   /*
 122    * Defaults.
 123    */
 124   ret.contents_depth[0] = 2;
 125   ret.contents_depth[1] = 3;
 126   ret.contents_depth[2] = 4;
 127   ret.contents_depth[3] = 5;
 128   ret.contents_depth[4] = 6;
 129   ret.contents_depth[5] = 7;
 130   ret.leaf_level = 2;
 131   ret.leaf_smallest_contents = 4;
 132   ret.leaf_contains_contents = FALSE;
 133   ret.include_version_id = TRUE;
 134   ret.author = NULL;
 135   ret.description = NULL;
 136   ret.head_end = NULL;
 137   ret.body = NULL;
 138   ret.body_start = NULL;
 139   ret.body_end = NULL;
 140   ret.address_start = NULL;
 141   ret.address_end = NULL;
 142   ret.nav_attrs = NULL;
 143   ret.suppress_address = FALSE;
 144
 145   ret.fchapter.just_numbers = FALSE;
 146   ret.fchapter.number_suffix = ustrdup(L": ");
 147   ret.nfsect = 2;
 148   ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
 149   ret.fsect[0].just_numbers = FALSE;
 150   ret.fsect[0].number_suffix = ustrdup(L": ");
 151   ret.fsect[1].just_numbers = TRUE;
 152   ret.fsect[1].number_suffix = ustrdup(L" ");
 153
 154   for (; source; source = source->next)
 155   {
 156     if (source->type == para_Config)
 157     {
 158              if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
 159         ret.contents_depth[0] = utoi(uadv(source->keyword));
 160       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
 161         ret.contents_depth[1] = utoi(uadv(source->keyword));
 162       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
 163         ret.contents_depth[2] = utoi(uadv(source->keyword));
 164       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
 165         ret.contents_depth[3] = utoi(uadv(source->keyword));
 166       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
 167         ret.contents_depth[4] = utoi(uadv(source->keyword));
 168       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
 169         ret.contents_depth[5] = utoi(uadv(source->keyword));
 170       } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
 171         ret.leaf_level = utoi(uadv(source->keyword));
 172       } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
 173         ret.leaf_smallest_contents = utoi(uadv(source->keyword));
 174       } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
 175         ret.include_version_id = utob(uadv(source->keyword));
 176       } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
 177         ret.leaf_contains_contents = utob(uadv(source->keyword));
 178       } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
 179         ret.suppress_address = utob(uadv(source->keyword));
 180       } else if (!ustricmp(source->keyword, L"xhtml-author")) {
 181         ret.author = uadv(source->keyword);
 182       } else if (!ustricmp(source->keyword, L"xhtml-description")) {
 183         ret.description = uadv(source->keyword);
 184       } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
 185         ret.head_end = uadv(source->keyword);
 186       } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
 187         ret.body_start = uadv(source->keyword);
 188       } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
 189         ret.body = uadv(source->keyword);
 190       } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
 191         ret.body_end = uadv(source->keyword);
 192       } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
 193         ret.address_start = uadv(source->keyword);
 194       } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
 195         ret.address_end = uadv(source->keyword);
 196       } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
 197         ret.nav_attrs = uadv(source->keyword);
 198       } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
 199         ret.fchapter.just_numbers = utob(uadv(source->keyword));
 200       } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
 201         ret.fchapter.number_suffix = uadv(source->keyword);
 202       } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
 203         wchar_t *p = uadv(source->keyword);
 204         int n = 0;
 205         if (uisdigit(*p)) {
 206           n = utoi(p);
 207           p = uadv(p);
 208         }
 209         if (n >= ret.nfsect) {
 210           int i;
 211           ret.fsect = resize(ret.fsect, n+1);
 212           for (i = ret.nfsect; i <= n; i++)
 213             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 214           ret.nfsect = n+1;
 215         }
 216         ret.fsect[n].just_numbers = utob(p);
 217       } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
 218         wchar_t *p = uadv(source->keyword);
 219         int n = 0;
 220         if (uisdigit(*p)) {
 221           n = utoi(p);
 222           p = uadv(p);
 223         }
 224         if (n >= ret.nfsect) {
 225           int i;
 226           ret.fsect = resize(ret.fsect, n+1);
 227           for (i = ret.nfsect; i <= n; i++)
 228             ret.fsect[i] = ret.fsect[ret.nfsect-1];
 229           ret.nfsect = n+1;
 230         }
 231         ret.fsect[n].number_suffix = p;
 232       }
 233     }
 234   }
 235
 236   /*  printf(" !!! leaf_level = %i\n", ret.leaf_level);
 237   printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
 238   printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
 239   printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
 240   printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
 241   printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
 242   printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
 243   printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
 244   return ret;
 245 }
 246
 247 static xhtmlsection *xhtml_new_section(xhtmlsection *last)
 248 {
 249   xhtmlsection *ret = mknew(xhtmlsection);
 250   ret->next=NULL;
 251   ret->child=NULL;
 252   ret->parent=NULL;
 253   ret->chain=last;
 254   ret->para=NULL;
 255   ret->file=NULL;
 256   ret->fragment=NULL;
 257   ret->level=-1; /* marker: end of chain */
 258   return ret;
 259 }
 260
 261 /* Returns NULL or the section that marks that paragraph */
 262 static xhtmlsection *xhtml_find_section(paragraph *p)
 263 {
 264   xhtmlsection *ret = topsection;
 265   if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
 266     paragraph *p2 = sourceparas;
 267     paragraph *p3 = NULL;
 268     while (p2 && p2!=p) {
 269       if (xhtml_para_level(p2)!=-1) {
 270         p3 = p2;
 271       }
 272       p2=p2->next;
 273     }
 274     if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
 275       /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
 276        * So don't do that, then.
 277        */
 278       return NULL;
 279     }
 280     p=p3;
 281   }
 282   while (ret && ret->para != p) {
 283 /*    printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
 284     ret=ret->chain;
 285   }
 286   return ret;
 287 }
 288
 289 static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
 290 {
 291   xhtmlfile *ret = mknew(xhtmlfile);
 292
 293   ret->next=NULL;
 294   ret->child=NULL;
 295   ret->parent=NULL;
 296   ret->filename=NULL;
 297   ret->sections=sect;
 298   ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
 299   if (sect==NULL) {
 300     if (conf.leaf_level==0) { /* currently unused */
 301 #define FILENAME_MANUAL "Manual.html"
 302 #define FILENAME_CONTENTS "Contents.html"
 303       ret->filename = smalloc(strlen(FILENAME_MANUAL)+1);
 304       sprintf(ret->filename, FILENAME_MANUAL);
 305     } else {
 306       ret->filename = smalloc(strlen(FILENAME_CONTENTS)+1);
 307       sprintf(ret->filename, FILENAME_CONTENTS);
 308     }
 309   } else {
 310     paragraph *p = sect->para;
 311     rdstringc fname_c = { 0, 0, NULL };
 312     char *c;
 313     word *w;
 314     for (w=(p->kwtext)?(p->kwtext):(p->words); w; w=w->next)
 315     {
 316       switch (removeattr(w->type))
 317       {
 318       case word_Normal:
 319         /*case word_Emph:
 320         case word_Code:
 321         case word_WeakCode:*/
 322         xhtml_utostr(w->text, &c);
 323         rdaddsc(&fname_c,c);
 324         sfree(c);
 325         break;
 326       }
 327     }
 328     rdaddsc(&fname_c, ".html");
 329     ret->filename = rdtrimc(&fname_c);
 330   }
 331   /*  printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
 332   return ret;
 333 }
 334
 335 /*
 336  * Walk the tree fixing up files which are actually leaf (ie
 337  * have no children) but aren't at leaf level, so they have the
 338  * leaf flag set.
 339  */
 340 void xhtml_fixup_layout(xhtmlfile* file)
 341 {
 342   if (file->child==NULL) {
 343     file->is_leaf = TRUE;
 344   } else {
 345     xhtml_fixup_layout(file->child);
 346   }
 347   if (file->next)
 348     xhtml_fixup_layout(file->next);
 349 }
 350
 351 /*
 352  * Create the tree structure so we know where everything goes.
 353  * Method:
 354  *
 355  * Ignoring file splitting, we have three choices with each new section:
 356  *
 357  * +-----------------+-----------------+
 358  * |                 |                 |
 359  * X            +----X----+           (1)
 360  *              |         |
 361  *              Y        (2)
 362  *              |
 363  *             (3)
 364  *
 365  * Y is the last section we added (currentsect).
 366  * If sect is the section we want to add, then:
 367  *
 368  * (1) if sect->level < currentsect->level
 369  * (2) if sect->level == currentsect->level
 370  * (3) if sect->level > currentsect->level
 371  *
 372  * This requires the constraint that you never skip section numbers
 373  * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
 374  *
 375  * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
 376  * more than one level at a time. Lots of asserts, and probably part of
 377  * the algorithm here, rely on this being true. (It currently isn't
 378  * enforced by halibut, however.)
 379  *
 380  * File splitting makes this harder. For instance, say we added at (3)
 381  * above and now need to add another section. We are splitting at level
 382  * 2, ie the level of Y. Z is the last section we added:
 383  *
 384  * +-----------------+-----------------+
 385  * |                 |                 |
 386  * X            +----X----+           (1)
 387  *              |         |
 388  *         +----Y----+   (1)
 389  *         |         |
 390  *         Z        (2)
 391  *         |
 392  *        (3)
 393  *
 394  * The (1) case is now split; we need to search upwards to find where
 395  * to actually link in. The other two cases remain the same (and will
 396  * always be like this).
 397  *
 398  * File splitting makes this harder, however. The decision of whether
 399  * to split to a new file is always on the same condition, however (is
 400  * the level of this section higher than the leaf_level configuration
 401  * value or not).
 402  *
 403  * Treating the cases backwards:
 404  *
 405  * (3) same file if sect->level > conf.leaf_level, otherwise new file
 406  *
 407  *     if in the same file, currentsect->child points to sect
 408  *     otherwise the linking is done through the file tree (which works
 409  *     in more or less the same way, ie currentfile->child points to
 410  *     the new file)
 411  *
 412  * (2) same file if sect->level > conf.leaf_level, otherwise new file
 413  *
 414  *     if in the same file, currentsect->next points to sect
 415  *     otherwise file linking and currentfile->next points to the new
 416  *     file (we know that Z must have caused a new file to be created)
 417  *
 418  * (1) same file if sect->level > conf.leaf_level, otherwise new file
 419  *
 420  *     this is actually effectively the same case as (2) here,
 421  *     except that we first have to travel up the sections to figure
 422  *     out which section this new one will be a sibling of. In doing
 423  *     so, we may disappear off the top of a file and have to go up
 424  *     to its parent in the file tree.
 425  *
 426  */
 427 static void xhtml_ponder_layout(paragraph *p)
 428 {
 429   xhtmlsection *lastsection;
 430   xhtmlsection *currentsect;
 431   xhtmlfile *currentfile;
 432
 433   lastfile = NULL;
 434   topsection = xhtml_new_section(NULL);
 435   topfile = xhtml_new_file(NULL);
 436   lastsection = topsection;
 437   currentfile = topfile;
 438   currentsect = topsection;
 439
 440   if (conf.leaf_level == 0) {
 441     topfile->is_leaf = 1;
 442     topfile->sections = topsection;
 443     topsection->file = topfile;
 444   }
 445
 446   for (; p; p=p->next)
 447   {
 448     int level = xhtml_para_level(p);
 449     if (level>0) /* actually a section */
 450     {
 451       xhtmlsection *sect;
 452       word *w;
 453       char *c;
 454       rdstringc fname_c = { 0, 0, NULL };
 455
 456       sect = xhtml_new_section(lastsection);
 457       lastsection = sect;
 458       sect->para = p;
 459       for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
 460       {
 461         switch (removeattr(w->type))
 462         {
 463         case word_Normal:
 464          /*case word_Emph:
 465          case word_Code:
 466          case word_WeakCode:*/
 467           xhtml_utostr(w->text, &c);
 468           rdaddsc(&fname_c,c);
 469           sfree(c);
 470           break;
 471         }
 472       }
 473 /*      rdaddsc(&fname_c, ".html");*/
 474       sect->fragment = rdtrimc(&fname_c);
 475       sect->level = level;
 476       /*      printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
 477
 478       if (level>currentsect->level) { /* case (3) */
 479         if (level>conf.leaf_level) { /* same file */
 480           assert(currentfile->is_leaf);
 481           currentsect->child = sect;
 482           sect->parent=currentsect;
 483           sect->file=currentfile;
 484           /*          printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
 485           currentsect=sect;
 486         } else { /* new file */
 487           xhtmlfile *file = xhtml_new_file(sect);
 488           assert(!currentfile->is_leaf);
 489           currentfile->child=file;
 490           sect->file=file;
 491           file->parent=currentfile;
 492           /*          printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
 493           currentfile=file;
 494           currentsect=sect;
 495         }
 496       } else if (level >= currentsect->file->sections->level) {
 497         /* Case (1) or (2) *AND* still under the section that starts
 498          * the current file.
 499          *
 500          * I'm not convinced that this couldn't be rolled in with the
 501          * final else {} leg further down. It seems a lot of effort
 502          * this way.
 503          */
 504         if (level>conf.leaf_level) { /* stick within the same file */
 505           assert(currentfile->is_leaf);
 506           sect->file = currentfile;
 507           while (currentsect && currentsect->level > level &&
 508                  currentsect->file==currentsect->parent->file) {
 509             currentsect = currentsect->parent;
 510           }
 511           assert(currentsect);
 512           currentsect->next = sect;
 513           assert(currentsect->level == sect->level);
 514           sect->parent = currentsect->parent;
 515           currentsect = sect;
 516           /*          printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
 517         } else { /* new file */
 518           xhtmlfile *file = xhtml_new_file(sect);
 519           sect->file=file;
 520           currentfile->next=file;
 521           file->parent=currentfile->parent;
 522           file->is_leaf=(level==conf.leaf_level);
 523           file->sections=sect;
 524           /*          printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
 525           currentfile=file;
 526           currentsect=sect;
 527         }
 528       } else { /* Case (1) or (2) and we must move up the file tree first */
 529         /* this loop is now probably irrelevant - we know we can't connect
 530          * to anything in the current file */
 531         while (currentsect && level<currentsect->level) {
 532           currentsect=currentsect->parent;
 533           if (currentsect) {
 534             /*            printf(" * up one level to '%s'\n", currentsect->fragment);*/
 535           } else {
 536             /*            printf(" * up one level (off top of current file)\n");*/
 537           }
 538         }
 539         if (currentsect) {
 540           /* I'm pretty sure this can now never fire */
 541           assert(currentfile->is_leaf);
 542           /*          printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
 543           sect->file = currentfile;
 544           currentsect->next=sect;
 545           currentsect=sect;
 546         } else { /* find a file we can attach to */
 547           while (currentfile && currentfile->sections && level<currentfile->sections->level) {
 548             currentfile=currentfile->parent;
 549             if (currentfile) {
 550               /*              printf(" * up one file level to '%s'\n", currentfile->filename);*/
 551             } else {
 552               /*              printf(" * up one file level (off top of tree)\n");*/
 553             }
 554           }
 555           if (currentfile) { /* new file (we had to skip up a file to
 556                                 get here, so we must be dealing with a
 557                                 level no lower than the configured
 558                                 leaf_level */
 559             xhtmlfile *file = xhtml_new_file(sect);
 560             currentfile->next=file;
 561             sect->file=file;
 562             file->parent=currentfile->parent;
 563             file->is_leaf=(level==conf.leaf_level);
 564             file->sections=sect;
 565             /*            printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
 566             currentfile=file;
 567             currentsect=sect;
 568           } else {
 569             fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
 570           }
 571         }
 572       }
 573     }
 574   }
 575   topsection = lastsection; /* get correct end of the chain */
 576   xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
 577 }
 578
 579 static void xhtml_do_index();
 580 static void xhtml_do_file(xhtmlfile *file);
 581 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
 582 static void xhtml_do_paras(FILE *fp, paragraph *p);
 583 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
 584 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
 585 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
 586 static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
 587 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
 588 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
 589
 590 /*
 591  * Do all the files in this structure.
 592  */
 593 static void xhtml_do_files(xhtmlfile *file)
 594 {
 595   xhtml_do_file(file);
 596   if (file->child)
 597     xhtml_do_files(file->child);
 598   if (file->next)
 599     xhtml_do_files(file->next);
 600 }
 601
 602 /*
 603  * Free up all memory used by the file tree from 'xfile' downwards
 604  */
 605 static void xhtml_free_file(xhtmlfile* xfile)
 606 {
 607   if (xfile==NULL) {
 608     return;
 609   }
 610
 611   if (xfile->filename) {
 612     sfree(xfile->filename);
 613   }
 614   xhtml_free_file(xfile->child);
 615   xhtml_free_file(xfile->next);
 616   sfree(xfile);
 617 }
 618
 619 /*
 620  * Main function.
 621  */
 622 void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
 623                    indexdata *in_idx)
 624 {
 625 /*  int i;*/
 626   indexentry *ientry;
 627   int ti;
 628   xhtmlsection *xsect;
 629
 630   sourceparas = sourceform;
 631   conf = xhtml_configure(sourceform);
 632   keywords = in_keywords;
 633   idx = in_idx;
 634
 635   /* Clear up the index entries backend data pointers */
 636   for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 637     ientry->backend_data=NULL;
 638   }
 639
 640   xhtml_ponder_layout(sourceform);
 641
 642   /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
 643 /*  xhtml_level_0(sourceform);
 644   for (i=1; i<=conf.leaf_level; i++)
 645   {
 646     xhtml_level(sourceform, i);
 647   }*/
 648
 649   /* new system ... (writes to *.html, but isn't fully trusted) */
 650   xhtml_do_top_file(topfile, sourceform);
 651   assert(!topfile->next); /* shouldn't have a sibling at all */
 652   if (topfile->child) {
 653     xhtml_do_files(topfile->child);
 654     xhtml_do_index();
 655   }
 656
 657   /* release file, section, index data structures */
 658   xsect = topsection;
 659   while (xsect) {
 660     xhtmlsection *tmp = xsect->chain;
 661     if (xsect->fragment) {
 662       sfree(xsect->fragment);
 663     }
 664     sfree(xsect);
 665     xsect = tmp;
 666   }
 667   xhtml_free_file(topfile);
 668   for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
 669     if (ientry->backend_data!=NULL) {
 670       xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
 671       if (xi->sections!=NULL) {
 672         sfree(xi->sections);
 673       }
 674       sfree(xi);
 675     }
 676     ientry->backend_data = NULL;
 677   }
 678 }
 679
 680 static int xhtml_para_level(paragraph *p)
 681 {
 682   switch (p->type)
 683   {
 684   case para_Title:
 685     return 0;
 686     break;
 687   case para_UnnumberedChapter:
 688   case para_Chapter:
 689   case para_Appendix:
 690     return 1;
 691     break;
 692 /*  case para_BiblioCited:
 693     return 2;
 694     break;*/
 695   case para_Heading:
 696   case para_Subsect:
 697     return p->aux+2;
 698     break;
 699   default:
 700     return -1;
 701     break;
 702   }
 703 }
 704
 705 static char* xhtml_index_filename = "IndexPage.html";
 706
 707 /* Output the nav links for the current file.
 708  * file == NULL means we're doing the index
 709  */
 710 static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
 711 {
 712   xhtmlfile *xhtml_next_file = NULL;
 713   fprintf(fp, "<p");
 714   if (conf.nav_attrs!=NULL) {
 715     fprintf(fp, " %ls>", conf.nav_attrs);
 716   } else {
 717     fprintf(fp, ">");
 718   }
 719   if (xhtml_last_file==NULL) {
 720     fprintf(fp, "Previous | ");
 721   } else {
 722     fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
 723   }
 724   fprintf(fp, "<a href='Contents.html'>Contents</a> | ");
 725   if (file != NULL) { /* otherwise we're doing nav links for the index */
 726     if (xhtml_next_file==NULL)
 727       xhtml_next_file = file->child;
 728     if (xhtml_next_file==NULL)
 729       xhtml_next_file = file->next;
 730     if (xhtml_next_file==NULL)
 731       xhtml_next_file = file->parent->next;
 732   }
 733   if (xhtml_next_file==NULL) {
 734     if (file==NULL) { /* index, so no next file */
 735       fprintf(fp, "Next ");
 736     } else {
 737       fprintf(fp, "<a href='%s'>Next</a>", xhtml_index_filename);
 738     }
 739   } else {
 740     fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
 741   }
 742   fprintf(fp, "</p>\n");
 743 }
 744
 745 /* Write out the index file */
 746 static void xhtml_do_index_body(FILE *fp)
 747 {
 748   indexentry *y;
 749   int ti;
 750
 751   if (count234(idx->entries) == 0)
 752     return;                            /* don't write anything at all */
 753
 754   fprintf(fp, "<dl>\n");
 755   /* iterate over idx->entries using the tree functions and display everything */
 756   for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
 757     if (y->backend_data) {
 758       int i;
 759       xhtmlindex *xi;
 760
 761       fprintf(fp, "<dt>");
 762       xhtml_para(fp, y->text);
 763       fprintf(fp, "</dt>\n<dd>");
 764
 765       xi = (xhtmlindex*) y->backend_data;
 766       for (i=0; i<xi->nsection; i++) {
 767         xhtmlsection *sect = xi->sections[i];
 768         if (sect) {
 769           fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
 770           if (sect->para->kwtext) {
 771             xhtml_para(fp, sect->para->kwtext);
 772           } else if (sect->para->words) {
 773             xhtml_para(fp, sect->para->words);
 774           }
 775           fprintf(fp, "</a>");
 776           if (i+1<xi->nsection) {
 777             fprintf(fp, ", ");
 778           }
 779         }
 780       }
 781       fprintf(fp, "</dd>\n");
 782     }
 783   }
 784   fprintf(fp, "</dl>\n");
 785 }
 786 static void xhtml_do_index()
 787 {
 788   word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
 789   FILE *fp = fopen(xhtml_index_filename, "w");
 790
 791   if (fp==NULL)
 792     fatal(err_cantopenw, xhtml_index_filename);
 793   xhtml_doheader(fp, &temp_word);
 794   xhtml_donavlinks(fp, NULL);
 795
 796   xhtml_do_index_body(fp);
 797
 798   xhtml_donavlinks(fp, NULL);
 799   xhtml_dofooter(fp);
 800   fclose(fp);
 801 }
 802
 803 /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
 804 static void xhtml_do_file(xhtmlfile *file)
 805 {
 806   FILE *fp = fopen(file->filename, "w");
 807   if (fp==NULL)
 808     fatal(err_cantopenw, file->filename);
 809
 810   if (file->sections->para->words) {
 811     xhtml_doheader(fp, file->sections->para->words);
 812   } else if (file->sections->para->kwtext) {
 813     xhtml_doheader(fp, file->sections->para->kwtext);
 814   } else {
 815     xhtml_doheader(fp, NULL);
 816   }
 817
 818   xhtml_donavlinks(fp, file);
 819
 820   if (file->is_leaf && conf.leaf_contains_contents &&
 821       xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
 822     xhtml_do_contents(fp, file);
 823   xhtml_do_sections(fp, file->sections);
 824   if (!file->is_leaf)
 825     xhtml_do_naked_contents(fp, file);
 826
 827   xhtml_donavlinks(fp, file);
 828
 829   xhtml_dofooter(fp);
 830   fclose(fp);
 831
 832   xhtml_last_file = file;
 833 }
 834
 835 /* Output the top-level file. */
 836 static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
 837 {
 838   paragraph *p;
 839   int done=FALSE;
 840   FILE *fp = fopen(file->filename, "w");
 841   if (fp==NULL)
 842     fatal(err_cantopenw, file->filename);
 843
 844   /* Do the title -- only one allowed */
 845   for (p = sourceform; p && !done; p = p->next)
 846   {
 847     if (p->type == para_Title)
 848     {
 849       xhtml_doheader(fp, p->words);
 850       done=TRUE;
 851     }
 852   }
 853   if (!done)
 854     xhtml_doheader(fp, NULL /* Eek! */);
 855
 856   /*
 857    * Display the title.
 858    */
 859   for (p = sourceform; p; p = p->next)
 860   {
 861     if (p->type == para_Title) {
 862       xhtml_heading(fp, p);
 863       break;
 864     }
 865   }
 866
 867   /* Do the preamble and copyright */
 868   for (p = sourceform; p; p = p->next)
 869   {
 870     if (p->type == para_Preamble)
 871     {
 872       fprintf(fp, "<p>");
 873       xhtml_para(fp, p->words);
 874       fprintf(fp, "</p>\n");
 875     }
 876   }
 877   for (p = sourceform; p; p = p->next)
 878   {
 879     if (p->type == para_Copyright)
 880     {
 881       fprintf(fp, "<p>");
 882       xhtml_para(fp, p->words);
 883       fprintf(fp, "</p>\n");
 884     }
 885   }
 886
 887   xhtml_do_contents(fp, file);
 888   xhtml_do_sections(fp, file->sections);
 889
 890   /*
 891    * Put the index in the top file if we're in single-file mode
 892    * (leaf-level 0).
 893    */
 894   if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
 895     fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
 896     xhtml_do_index_body(fp);
 897   }
 898
 899   xhtml_dofooter(fp);
 900   fclose(fp);
 901 }
 902
 903 /* Convert a Unicode string to an ASCII one. '?' is
 904  * used for unmappable characters.
 905  */
 906 static void xhtml_utostr(wchar_t *in, char **out)
 907 {
 908   int l = ustrlen(in);
 909   int i;
 910   *out = smalloc(l+1);
 911   for (i=0; i<l; i++)
 912   {
 913     if (in[i]>=32 && in[i]<=126)
 914       (*out)[i]=(char)in[i];
 915     else
 916       (*out)[i]='?';
 917   }
 918   (*out)[i]=0;
 919 }
 920
 921 /*
 922  * Write contents for the given file, and subfiles, down to
 923  * the appropriate contents depth. Returns the number of
 924  * entries written.
 925  */
 926 static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
 927 {
 928   int level, limit, start_level, count = 0;
 929   if (!file)
 930     return 0;
 931
 932   level = (file->sections)?(file->sections->level):(0);
 933   limit = conf.contents_depth[(level>5)?(5):(level)];
 934   start_level = (file->is_leaf) ? (level-1) : (level);
 935   last_level = start_level;
 936
 937   count += xhtml_do_contents_section_limit(fp, file->sections, limit);
 938   count += xhtml_do_contents_limit(fp, file->child, limit);
 939   if (fp!=NULL) {
 940     while (last_level > start_level) {
 941       last_level--;
 942       fprintf(fp, "</ul>\n");
 943     }
 944   }
 945   return count;
 946 }
 947
 948 /* As above, but doesn't do anything in the current file */
 949 static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
 950 {
 951   int level, limit, start_level, count = 0;
 952   if (!file)
 953     return 0;
 954
 955   level = (file->sections)?(file->sections->level):(0);
 956   limit = conf.contents_depth[(level>5)?(5):(level)];
 957   start_level = (file->is_leaf) ? (level-1) : (level);
 958   last_level = start_level;
 959
 960   count = xhtml_do_contents_limit(fp, file->child, limit);
 961   if (fp!=NULL) {
 962     while (last_level > start_level) {
 963       last_level--;
 964       fprintf(fp, "</ul>\n");
 965     }
 966   }
 967   return count;
 968 }
 969
 970 /*
 971  * Write contents for the given file, children, and siblings, down to
 972  * given limit contents depth.
 973  */
 974 static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
 975 {
 976   int count = 0;
 977   while (file) {
 978     count += xhtml_do_contents_section_limit(fp, file->sections, limit);
 979     count += xhtml_do_contents_limit(fp, file->child, limit);
 980     file = file->next;
 981   }
 982   return count;
 983 }
 984
 985 /*
 986  * Write contents entries for the given section tree, down to the
 987  * limit contents depth.
 988  */
 989 static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
 990 {
 991   int count = 0;
 992   while (section) {
 993     if (!xhtml_add_contents_entry(fp, section, limit))
 994       return 0;
 995     else
 996       count++;
 997     count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
 998     section = section->next;
 999   }
1000   return count;
1001 }
1002
1003 /*
1004  * Write contents entries for the given section tree, down to the
1005  * limit contents depth.
1006  */
1007 static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
1008 {
1009   int count = 0;
1010   if (!section)
1011     return 0;
1012   xhtml_add_contents_entry(fp, section, limit);
1013   count=1;
1014   count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
1015   /*  section=section->child;
1016   while (section && xhtml_add_contents_entry(fp, section, limit)) {
1017     section = section->next;
1018     }*/
1019   return count;
1020 }
1021
1022 /*
1023  * Add a section entry, unless we're exceeding the limit, in which
1024  * case return FALSE (otherwise return TRUE).
1025  */
1026 static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
1027 {
1028   if (!section || section->level > limit)
1029     return FALSE;
1030   if (fp==NULL || section->level < 0)
1031     return TRUE;
1032   while (last_level > section->level) {
1033     last_level--;
1034     fprintf(fp, "</ul>\n");
1035   }
1036   while (last_level < section->level) {
1037     last_level++;
1038     fprintf(fp, "<ul>\n");
1039   }
1040   fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
1041   if (section->para->kwtext) {
1042     xhtml_para(fp, section->para->kwtext);
1043     if (section->para->words) {
1044       fprintf(fp, ": ");
1045     }
1046   }
1047   if (section->para->words) {
1048     xhtml_para(fp, section->para->words);
1049   }
1050   fprintf(fp, "</a></li>\n");
1051   return TRUE;
1052 }
1053
1054 /*
1055  * Write all the sections in this file. Do all paragraphs in this section, then all
1056  * children (recursively), then go on to the next one (tail recursively).
1057  */
1058 static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
1059 {
1060   while (sections) {
1061     currentsection = sections;
1062     xhtml_do_paras(fp, sections->para);
1063     xhtml_do_sections(fp, sections->child);
1064     sections = sections->next;
1065   }
1066 }
1067
1068 /* Write this list of paragraphs. Close off all lists at the end. */
1069 static void xhtml_do_paras(FILE *fp, paragraph *p)
1070 {
1071   int last_type = -1, first=TRUE;
1072   if (!p)
1073     return;
1074
1075 /*  for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
1076   for (; p && (xhtml_para_level(p)==-1 || first); p=p->next) {
1077     first=FALSE;
1078     switch (p->type)
1079     {
1080       /*
1081        * Things we ignore because we've already processed them or
1082        * aren't going to touch them in this pass.
1083        */
1084      case para_IM:
1085      case para_BR:
1086      case para_Biblio:                 /* only touch BiblioCited */
1087      case para_VersionID:
1088      case para_Copyright:
1089      case para_Preamble:
1090      case para_NoCite:
1091      case para_Title:
1092        break;
1093
1094        /*
1095         * Chapter titles.
1096         */
1097       case para_Chapter:
1098       case para_Appendix:
1099       case para_UnnumberedChapter:
1100         xhtml_heading(fp, p);
1101         break;
1102
1103       case para_Heading:
1104       case para_Subsect:
1105         xhtml_heading(fp, p);
1106         break;
1107
1108       case para_Rule:
1109         fprintf(fp, "\n<hr />\n");
1110         break;
1111
1112       case para_Normal:
1113         fprintf(fp, "\n<p>");
1114         xhtml_para(fp, p->words);
1115         fprintf(fp, "</p>\n");
1116         break;
1117
1118       case para_Bullet:
1119       case para_NumberedList:
1120       case para_BiblioCited:
1121         if (last_type!=p->type) {
1122           /* start up list if necessary */
1123           if (p->type == para_Bullet) {
1124             fprintf(fp, "<ul>\n");
1125           } else if (p->type == para_NumberedList) {
1126             fprintf(fp, "<ol>\n");
1127           } else if (p->type == para_BiblioCited) {
1128             fprintf(fp, "<dl>\n");
1129           }
1130         }
1131         if (p->type == para_Bullet || p->type == para_NumberedList)
1132           fprintf(fp, "<li>");
1133         else if (p->type == para_BiblioCited) {
1134           fprintf(fp, "<dt>");
1135           xhtml_para(fp, p->kwtext);
1136           fprintf(fp, "</dt>\n<dd>");
1137         }
1138         xhtml_para(fp, p->words);
1139         if (p->type == para_BiblioCited) {
1140           fprintf(fp, "</dd>\n");
1141         } else if (p->type == para_Bullet || p->type == para_NumberedList) {
1142           fprintf(fp, "</li>");
1143         }
1144         if (p->type == para_Bullet || p->type == para_NumberedList || p->type == para_BiblioCited)
1145           /* close off list if necessary */
1146         {
1147           paragraph *p2 = p->next;
1148           int close_off=FALSE;
1149 /*          if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
1150           if (p2 && xhtml_para_level(p2)==-1) {
1151             if (p2->type != p->type)
1152               close_off=TRUE;
1153           } else {
1154             close_off=TRUE;
1155           }
1156           if (close_off) {
1157             if (p->type == para_Bullet) {
1158               fprintf(fp, "</ul>\n");
1159             } else if (p->type == para_NumberedList) {
1160               fprintf(fp, "</ol>\n");
1161             } else if (p->type == para_BiblioCited) {
1162               fprintf(fp, "</dl>\n");
1163             }
1164           }
1165         }
1166         break;
1167
1168       case para_Code:
1169         xhtml_codepara(fp, p->words);
1170         break;
1171     }
1172     last_type = p->type;
1173   }
1174 }
1175
1176 /*
1177  * Output a header for this XHTML file.
1178  */
1179 static void xhtml_doheader(FILE *fp, word *title)
1180 {
1181   fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
1182   fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
1183   fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
1184   if (title==NULL)
1185     fprintf(fp, "The thing with no name!");
1186   else
1187     xhtml_para(fp, title);
1188   fprintf(fp, "</title>\n");
1189   fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
1190   if (conf.author)
1191     fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
1192   if (conf.description)
1193     fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
1194   if (conf.head_end)
1195     fprintf(fp, "%ls\n", conf.head_end);
1196   fprintf(fp, "</head>\n\n");
1197   if (conf.body)
1198     fprintf(fp, "%ls\n", conf.body);
1199   else
1200     fprintf(fp, "<body>\n");
1201   if (conf.body_start)
1202     fprintf(fp, "%ls\n", conf.body_start);
1203 }
1204
1205 /*
1206  * Output a footer for this XHTML file.
1207  */
1208 static void xhtml_dofooter(FILE *fp)
1209 {
1210   fprintf(fp, "\n<hr />\n\n");
1211   if (conf.body_end)
1212     fprintf(fp, "%ls\n", conf.body_end);
1213   if (!conf.suppress_address) {
1214     fprintf(fp,"<address>\n");
1215     if (conf.address_start)
1216       fprintf(fp, "%ls\n", conf.address_start);
1217     /* Do the version ID */
1218     if (conf.include_version_id) {
1219       paragraph *p;
1220       int started = 0;
1221       for (p = sourceparas; p; p = p->next)
1222         if (p->type == para_VersionID) {
1223           xhtml_versionid(fp, p->words, started);
1224           started = 1;
1225         }
1226     }
1227     if (conf.address_end)
1228       fprintf(fp, "%ls\n", conf.address_end);
1229     fprintf(fp, "</address>\n");
1230   }
1231   fprintf(fp, "</body>\n\n</html>\n");
1232 }
1233
1234 /*
1235  * Output the versionid paragraph. Typically this is a version control
1236  * ID string (such as $Id...$ in RCS).
1237  */
1238 static void xhtml_versionid(FILE *fp, word *text, int started)
1239 {
1240   rdstringc t = { 0, 0, NULL };
1241
1242   rdaddc(&t, '[');                     /* FIXME: configurability */
1243   xhtml_rdaddwc(&t, text, NULL);
1244   rdaddc(&t, ']');                     /* FIXME: configurability */
1245
1246   if (started)
1247     fprintf(fp, "<br>\n");
1248   fprintf(fp, "%s\n", t.text);
1249   sfree(t.text);
1250 }
1251
1252 /* Is this an XHTML reserved character? */
1253 static int xhtml_reservedchar(int c)
1254 {
1255   if (c=='&' || c=='<' || c=='>' || c=='"')
1256     return TRUE;
1257   else
1258     return FALSE;
1259 }
1260
1261 /*
1262  * Convert a wide string into valid XHTML: Anything outside ASCII will
1263  * be fixed up as an entity. Currently we don't worry about constraining the
1264  * encoded character set, which we should probably do at some point (we can
1265  * still fix up and return FALSE - see the last comment here). We also don't
1266  * currently
1267  *
1268  * Because this is only used for words, spaces are HARD spaces (any other
1269  * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
1270  * Unless hard_spaces is FALSE, of course (code paragraphs break the above
1271  * rule).
1272  *
1273  * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
1274  * it in `*result'. If `result' is NULL, merely checks whether all
1275  * characters in the string are feasible.
1276  *
1277  * Return is nonzero if all characters are OK. If not all
1278  * characters are OK but `result' is non-NULL, a result _will_
1279  * still be generated!
1280  */
1281 static int xhtml_convert(wchar_t *s, char **result, int hard_spaces) {
1282     int doing = (result != 0);
1283     int ok = TRUE;
1284     char *p = NULL;
1285     int plen = 0, psize = 0;
1286
1287     for (; *s; s++) {
1288         wchar_t c = *s;
1289
1290 #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
1291
1292         if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
1293             /* Char is OK. */
1294             if (doing)
1295             {
1296               ensure_size(plen);
1297               p[plen++] = (char)c;
1298             }
1299         } else {
1300             /* Char needs fixing up. */
1301             /* ok = FALSE; -- currently we never return FALSE; we
1302              * might want to when considering a character set for the
1303              * encoded document.
1304              */
1305             if (doing)
1306             {
1307               if (c==32) { /* a space in a word is a hard space */
1308                 ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
1309                 sprintf(p+plen, "&nbsp;");
1310                 plen+=6;
1311               } else {
1312                 /* FIXME: entity names! */
1313                 ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
1314                 plen+=sprintf(p+plen, "&#%04i;", (int)c);
1315               }
1316             }
1317         }
1318     }
1319     if (doing) {
1320         p = resize(p, plen+1);
1321         p[plen] = '\0';
1322         *result = p;
1323     }
1324     return ok;
1325 }
1326
1327 /*
1328  * This formats the given words as XHTML.
1329  */
1330 static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end) {
1331     char *c;
1332     keyword *kwl;
1333     xhtmlsection *sect;
1334     indextag *itag;
1335     int ti;
1336
1337     for (; text && text != end; text = text->next) {
1338       switch (text->type) {
1339       case word_HyperLink:
1340         xhtml_utostr(text->text, &c);
1341         rdaddsc(rs, "<a href=\"");
1342         rdaddsc(rs, c);
1343         rdaddsc(rs, "\">");
1344         sfree(c);
1345         break;
1346
1347       case word_UpperXref:
1348       case word_LowerXref:
1349         kwl = kw_lookup(keywords, text->text);
1350         if (kwl) {
1351           sect=xhtml_find_section(kwl->para);
1352           if (sect) {
1353             rdaddsc(rs, "<a href=\"");
1354             rdaddsc(rs, sect->file->filename);
1355             rdaddc(rs, '#');
1356             rdaddsc(rs, sect->fragment);
1357             rdaddsc(rs, "\">");
1358           } else {
1359             rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
1360             error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
1361           }
1362         } else {
1363           rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
1364           error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
1365         }
1366         break;
1367
1368       case word_IndexRef: /* in theory we could make an index target here */
1369 /*        rdaddsc(rs, "<a name=\"idx-");
1370         xhtml_utostr(text->text, &c);
1371         rdaddsc(rs, c);
1372         sfree(c);
1373         rdaddsc(rs, "\"></a>");*/
1374         /* what we _do_ need to do is to fix up the backend data
1375          * for any indexentry this points to.
1376          */
1377         for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
1378           /* FIXME: really ustricmp() and not ustrcmp()? */
1379           if (ustricmp(itag->name, text->text)==0) {
1380             break;
1381           }
1382         }
1383         if (itag!=NULL) {
1384           if (itag->refs!=NULL) {
1385             int i;
1386             for (i=0; i<itag->nrefs; i++) {
1387               xhtmlindex *idx_ref;
1388               indexentry *ientry;
1389
1390               ientry = itag->refs[i];
1391               if (ientry->backend_data==NULL) {
1392                 idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
1393                 if (idx_ref==NULL)
1394                   fatal(err_nomemory);
1395                 idx_ref->nsection = 0;
1396                 idx_ref->size = 4;
1397                 idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
1398                 if (idx_ref->sections==NULL)
1399                   fatal(err_nomemory);
1400                 ientry->backend_data = idx_ref;
1401               } else {
1402                 idx_ref = ientry->backend_data;
1403                 if (idx_ref->nsection+1 > idx_ref->size) {
1404                   int new_size = idx_ref->size * 2;
1405                   idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
1406                   if (idx_ref->sections==NULL) {
1407                     fatal(err_nomemory);
1408                   }
1409                   idx_ref->size = new_size;
1410                 }
1411               }
1412               idx_ref->sections[idx_ref->nsection++] = currentsection;
1413 #if 0
1414 #endif
1415             }
1416           } else {
1417             fatal(err_whatever, "Index tag had no entries!");
1418           }
1419         } else {
1420           fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
1421           fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
1422         }
1423         break;
1424
1425       case word_HyperEnd:
1426       case word_XrefEnd:
1427         rdaddsc(rs, "</a>");
1428         break;
1429
1430       case word_Normal:
1431       case word_Emph:
1432       case word_Code:
1433       case word_WeakCode:
1434       case word_WhiteSpace:
1435       case word_EmphSpace:
1436       case word_CodeSpace:
1437       case word_WkCodeSpace:
1438       case word_Quote:
1439       case word_EmphQuote:
1440       case word_CodeQuote:
1441       case word_WkCodeQuote:
1442         assert(text->type != word_CodeQuote &&
1443                text->type != word_WkCodeQuote);
1444         if (towordstyle(text->type) == word_Emph &&
1445             (attraux(text->aux) == attr_First ||
1446              attraux(text->aux) == attr_Only))
1447             rdaddsc(rs, "<em>");
1448         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1449                  (attraux(text->aux) == attr_First ||
1450                   attraux(text->aux) == attr_Only))
1451             rdaddsc(rs, "<code>");
1452
1453         if (removeattr(text->type) == word_Normal) {
1454           if (xhtml_convert(text->text, &c, TRUE)) /* spaces in the word are hard */
1455             rdaddsc(rs, c);
1456           else
1457             xhtml_rdaddwc(rs, text->alt, NULL);
1458           sfree(c);
1459         } else if (removeattr(text->type) == word_WhiteSpace) {
1460           rdaddc(rs, ' ');
1461         } else if (removeattr(text->type) == word_Quote) {
1462           rdaddsc(rs, "&quot;");
1463         }
1464
1465         if (towordstyle(text->type) == word_Emph &&
1466             (attraux(text->aux) == attr_Last ||
1467              attraux(text->aux) == attr_Only))
1468             rdaddsc(rs, "</em>");
1469         else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
1470                  (attraux(text->aux) == attr_Last ||
1471                   attraux(text->aux) == attr_Only))
1472             rdaddsc(rs, "</code>");
1473         break;
1474       }
1475     }
1476 }
1477
1478 /* Output a heading, formatted as XHTML.
1479  */
1480 static void xhtml_heading(FILE *fp, paragraph *p)
1481 {
1482     rdstringc t = { 0, 0, NULL };
1483     word *tprefix = p->kwtext;
1484     word *nprefix = p->kwtext2;
1485     word *text = p->words;
1486     int level = xhtml_para_level(p);
1487     xhtmlsection *sect = xhtml_find_section(p);
1488     xhtmlheadfmt *fmt;
1489     char *fragment;
1490     if (sect) {
1491       fragment = sect->fragment;
1492     } else {
1493       if (p->type == para_Title)
1494         fragment = "title";
1495       else {
1496         fragment = ""; /* FIXME: what else can we do? */
1497         error(err_whatever, "Couldn't locate heading cross-reference!");
1498       }
1499     }
1500
1501     if (p->type == para_Title)
1502         fmt = NULL;
1503     else if (level == 1)
1504         fmt = &conf.fchapter;
1505     else if (level-1 < conf.nfsect)
1506         fmt = &conf.fsect[level-1];
1507     else
1508         fmt = &conf.fsect[conf.nfsect-1];
1509
1510     if (fmt && fmt->just_numbers && nprefix) {
1511         xhtml_rdaddwc(&t, nprefix, NULL);
1512         if (fmt) {
1513             char *c;
1514             if (xhtml_convert(fmt->number_suffix, &c, FALSE)) {
1515                 rdaddsc(&t, c);
1516                 sfree(c);
1517             }
1518         }
1519     } else if (fmt && !fmt->just_numbers && tprefix) {
1520         xhtml_rdaddwc(&t, tprefix, NULL);
1521         if (fmt) {
1522             char *c;
1523             if (xhtml_convert(fmt->number_suffix, &c, FALSE)) {
1524                 rdaddsc(&t, c);
1525                 sfree(c);
1526             }
1527         }
1528     }
1529     xhtml_rdaddwc(&t, text, NULL);
1530     /*
1531      * If we're outputting in single-file mode, we need to lower
1532      * the level of each heading by one, because the overall
1533      * document title will be sitting right at the top as an <h1>
1534      * and so chapters and sections should start at <h2>.
1535      *
1536      * Even if not, the document title will come back from
1537      * xhtml_para_level() as level zero, so we must increment that
1538      * no matter what leaf_level is set to.
1539      */
1540     if (conf.leaf_level == 0 || level == 0)
1541         level++;
1542     fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
1543     sfree(t.text);
1544 }
1545
1546 /* Output a paragraph. Styles are handled by xhtml_rdaddwc().
1547  * This looks pretty simple; I may have missed something ...
1548  */
1549 static void xhtml_para(FILE *fp, word *text)
1550 {
1551   rdstringc out = { 0, 0, NULL };
1552   xhtml_rdaddwc(&out, text, NULL);
1553   fprintf(fp, "%s", out.text);
1554   sfree(out.text);
1555 }
1556
1557 /* Output a code paragraph. I'm treating this as preformatted, which
1558  * may not be entirely correct. See xhtml_para() for my worries about
1559  * this being overly-simple; however I think that most of the complexity
1560  * of the text backend came entirely out of word wrapping anyway.
1561  */
1562 static void xhtml_codepara(FILE *fp, word *text)
1563 {
1564   fprintf(fp, "<pre>");
1565     for (; text; text = text->next) if (text->type == word_WeakCode) {
1566         char *c;
1567         xhtml_convert(text->text, &c, FALSE);
1568         fprintf(fp, "%s\n", c);
1569         sfree(c);
1570     }
1571   fprintf(fp, "</pre>\n");
1572 }