mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_text.c

   1 /*
   2  * text backend for Halibut
   3  */
   4
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <assert.h>
   8 #include "halibut.h"
   9
  10 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
  11 typedef struct {
  12     alignment align;
  13     int just_numbers;
  14     wchar_t underline;
  15     wchar_t *number_suffix;
  16 } alignstruct;
  17
  18 typedef struct {
  19     int indent, indent_code;
  20     int listindentbefore, listindentafter;
  21     int width;
  22     alignstruct atitle, achapter, *asect;
  23     int nasect;
  24     int include_version_id;
  25     int indent_preambles;
  26     word bullet;
  27 } textconfig;
  28
  29 static int text_convert(wchar_t *, char **);
  30
  31 static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
  32 static void text_rule(FILE *, int, int);
  33 static void text_para(FILE *, word *, char *, word *, int, int, int);
  34 static void text_codepara(FILE *, word *, int, int);
  35 static void text_versionid(FILE *, word *);
  36
  37 static alignment utoalign(wchar_t *p) {
  38     if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
  39         return CENTRE;
  40     if (!ustricmp(p, L"leftplus"))
  41         return LEFTPLUS;
  42     return LEFT;
  43 }
  44
  45 static textconfig text_configure(paragraph *source) {
  46     textconfig ret;
  47
  48     /*
  49      * Non-negotiables.
  50      */
  51     ret.bullet.next = NULL;
  52     ret.bullet.alt = NULL;
  53     ret.bullet.type = word_Normal;
  54     ret.atitle.just_numbers = FALSE;   /* ignored */
  55
  56     /*
  57      * Defaults.
  58      */
  59     ret.indent = 7;
  60     ret.indent_code = 2;
  61     ret.listindentbefore = 1;
  62     ret.listindentafter = 3;
  63     ret.width = 68;
  64     ret.atitle.align = CENTRE;
  65     ret.atitle.underline = L'=';
  66     ret.achapter.align = LEFT;
  67     ret.achapter.just_numbers = FALSE;
  68     ret.achapter.number_suffix = ustrdup(L": ");
  69     ret.achapter.underline = L'-';
  70     ret.nasect = 1;
  71     ret.asect = mknewa(alignstruct, ret.nasect);
  72     ret.asect[0].align = LEFTPLUS;
  73     ret.asect[0].just_numbers = TRUE;
  74     ret.asect[0].number_suffix = ustrdup(L" ");
  75     ret.asect[0].underline = L'\0';
  76     ret.include_version_id = TRUE;
  77     ret.indent_preambles = FALSE;
  78     ret.bullet.text = ustrdup(L"-");
  79
  80     for (; source; source = source->next) {
  81         if (source->type == para_Config) {
  82             if (!ustricmp(source->keyword, L"text-indent")) {
  83                 ret.indent = utoi(uadv(source->keyword));
  84             } else if (!ustricmp(source->keyword, L"text-indent-code")) {
  85                 ret.indent_code = utoi(uadv(source->keyword));
  86             } else if (!ustricmp(source->keyword, L"text-width")) {
  87                 ret.width = utoi(uadv(source->keyword));
  88             } else if (!ustricmp(source->keyword, L"text-list-indent")) {
  89                 ret.listindentbefore = utoi(uadv(source->keyword));
  90             } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
  91                 ret.listindentafter = utoi(uadv(source->keyword));
  92             } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
  93                 ret.achapter.align = utoalign(uadv(source->keyword));
  94             } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
  95                 ret.achapter.underline = *uadv(source->keyword);
  96             } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
  97                 ret.achapter.just_numbers = utob(uadv(source->keyword));
  98             } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
  99                 ret.achapter.number_suffix = ustrdup(uadv(source->keyword));
 100             } else if (!ustricmp(source->keyword, L"text-section-align")) {
 101                 wchar_t *p = uadv(source->keyword);
 102                 int n = 0;
 103                 if (uisdigit(*p)) {
 104                     n = utoi(p);
 105                     p = uadv(p);
 106                 }
 107                 if (n >= ret.nasect) {
 108                     int i;
 109                     ret.asect = resize(ret.asect, n+1);
 110                     for (i = ret.nasect; i <= n; i++)
 111                         ret.asect[i] = ret.asect[ret.nasect-1];
 112                     ret.nasect = n+1;
 113                 }
 114                 ret.asect[n].align = utoalign(p);
 115             } else if (!ustricmp(source->keyword, L"text-section-underline")) {
 116                 wchar_t *p = uadv(source->keyword);
 117                 int n = 0;
 118                 if (uisdigit(*p)) {
 119                     n = utoi(p);
 120                     p = uadv(p);
 121                 }
 122                 if (n >= ret.nasect) {
 123                     int i;
 124                     ret.asect = resize(ret.asect, n+1);
 125                     for (i = ret.nasect; i <= n; i++)
 126                         ret.asect[i] = ret.asect[ret.nasect-1];
 127                     ret.nasect = n+1;
 128                 }
 129                 ret.asect[n].underline = *p;
 130             } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
 131                 wchar_t *p = uadv(source->keyword);
 132                 int n = 0;
 133                 if (uisdigit(*p)) {
 134                     n = utoi(p);
 135                     p = uadv(p);
 136                 }
 137                 if (n >= ret.nasect) {
 138                     int i;
 139                     ret.asect = resize(ret.asect, n+1);
 140                     for (i = ret.nasect; i <= n; i++)
 141                         ret.asect[i] = ret.asect[ret.nasect-1];
 142                     ret.nasect = n+1;
 143                 }
 144                 ret.asect[n].just_numbers = utob(p);
 145             } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
 146                 wchar_t *p = uadv(source->keyword);
 147                 int n = 0;
 148                 if (uisdigit(*p)) {
 149                     n = utoi(p);
 150                     p = uadv(p);
 151                 }
 152                 if (n >= ret.nasect) {
 153                     int i;
 154                     ret.asect = resize(ret.asect, n+1);
 155                     for (i = ret.nasect; i <= n; i++)
 156                         ret.asect[i] = ret.asect[ret.nasect-1];
 157                     ret.nasect = n+1;
 158                 }
 159                 ret.asect[n].number_suffix = ustrdup(p);
 160             } else if (!ustricmp(source->keyword, L"text-title-align")) {
 161                 ret.atitle.align = utoalign(uadv(source->keyword));
 162             } else if (!ustricmp(source->keyword, L"text-title-underline")) {
 163                 ret.atitle.underline = *uadv(source->keyword);
 164             } else if (!ustricmp(source->keyword, L"text-versionid")) {
 165                 ret.include_version_id = utob(uadv(source->keyword));
 166             } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
 167                 ret.indent_preambles = utob(uadv(source->keyword));
 168             } else if (!ustricmp(source->keyword, L"text-bullet")) {
 169                 ret.bullet.text = uadv(source->keyword);
 170             }
 171         }
 172     }
 173
 174     return ret;
 175 }
 176
 177 void text_backend(paragraph *sourceform, keywordlist *keywords,
 178                   indexdata *idx) {
 179     paragraph *p;
 180     textconfig conf;
 181     word *prefix, *body, *wp;
 182     word spaceword;
 183     FILE *fp;
 184     char *prefixextra;
 185     int nesting, nestindent;
 186     int indentb, indenta;
 187     int done_copyright;
 188
 189     IGNORE(keywords);                  /* we don't happen to need this */
 190     IGNORE(idx);                       /* or this */
 191
 192     conf = text_configure(sourceform);
 193
 194     /*
 195      * Determine the output file name, and open the output file
 196      *
 197      * FIXME: want configurable output file names here. For the
 198      * moment, we'll just call it `output.txt'.
 199      */
 200     fp = fopen("output.txt", "w");
 201     if (!fp) {
 202         error(err_cantopenw, "output.txt");
 203         return;
 204     }
 205
 206     /* Do the title */
 207     for (p = sourceform; p; p = p->next)
 208         if (p->type == para_Title)
 209             text_heading(fp, NULL, NULL, p->words,
 210                          conf.atitle, conf.indent, conf.width);
 211
 212     nestindent = conf.listindentbefore + conf.listindentafter;
 213     nesting = (conf.indent_preambles ? 0 : -conf.indent);
 214     done_copyright = FALSE;
 215
 216     /* Do the main document */
 217     for (p = sourceform; p; p = p->next) switch (p->type) {
 218
 219       case para_QuotePush:
 220         nesting += 2;
 221         break;
 222       case para_QuotePop:
 223         nesting -= 2;
 224         assert(nesting >= 0);
 225         break;
 226
 227       case para_LcontPush:
 228         nesting += nestindent;
 229         break;
 230       case para_LcontPop:
 231         nesting -= nestindent;
 232         assert(nesting >= 0);
 233         break;
 234
 235         /*
 236          * Things we ignore because we've already processed them or
 237          * aren't going to touch them in this pass.
 238          */
 239       case para_IM:
 240       case para_BR:
 241       case para_Biblio:                /* only touch BiblioCited */
 242       case para_VersionID:
 243       case para_Copyright:
 244       case para_NoCite:
 245       case para_Title:
 246         break;
 247
 248         /*
 249          * Chapter titles.
 250          */
 251       case para_Chapter:
 252       case para_Appendix:
 253       case para_UnnumberedChapter:
 254         /*
 255          * The copyright should come after the preamble but before
 256          * the first chapter title.
 257          */
 258         if (!done_copyright) {
 259             paragraph *p;
 260
 261             for (p = sourceform; p; p = p->next)
 262                 if (p->type == para_Copyright)
 263                     text_para(fp, NULL, NULL, p->words,
 264                               conf.indent + nesting, 0, conf.width - nesting);
 265             done_copyright = TRUE;
 266         }
 267         text_heading(fp, p->kwtext, p->kwtext2, p->words,
 268                      conf.achapter, conf.indent, conf.width);
 269         nesting = 0;
 270         break;
 271
 272       case para_Heading:
 273       case para_Subsect:
 274         text_heading(fp, p->kwtext, p->kwtext2, p->words,
 275                      conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
 276                      conf.indent, conf.width);
 277         break;
 278
 279       case para_Rule:
 280         text_rule(fp, conf.indent + nesting, conf.width - nesting);
 281         break;
 282
 283       case para_Normal:
 284       case para_DescribedThing:
 285       case para_Description:
 286       case para_BiblioCited:
 287       case para_Bullet:
 288       case para_NumberedList:
 289         if (p->type == para_Bullet) {
 290             prefix = &conf.bullet;
 291             prefixextra = NULL;
 292             indentb = conf.listindentbefore;
 293             indenta = conf.listindentafter;
 294         } else if (p->type == para_NumberedList) {
 295             prefix = p->kwtext;
 296             prefixextra = ".";         /* FIXME: configurability */
 297             indentb = conf.listindentbefore;
 298             indenta = conf.listindentafter;
 299         } else if (p->type == para_Description) {
 300             prefix = NULL;
 301             prefixextra = NULL;
 302             indentb = conf.listindentbefore;
 303             indenta = conf.listindentafter;
 304         } else {
 305             prefix = NULL;
 306             prefixextra = NULL;
 307             indentb = indenta = 0;
 308         }
 309         if (p->type == para_BiblioCited) {
 310             body = dup_word_list(p->kwtext);
 311             for (wp = body; wp->next; wp = wp->next);
 312             wp->next = &spaceword;
 313             spaceword.next = p->words;
 314             spaceword.alt = NULL;
 315             spaceword.type = word_WhiteSpace;
 316             spaceword.text = NULL;
 317         } else {
 318             wp = NULL;
 319             body = p->words;
 320         }
 321         text_para(fp, prefix, prefixextra, body,
 322                   conf.indent + nesting + indentb, indenta,
 323                   conf.width - nesting - indentb - indenta);
 324         if (wp) {
 325             wp->next = NULL;
 326             free_word_list(body);
 327         }
 328         break;
 329
 330       case para_Code:
 331         text_codepara(fp, p->words,
 332                       conf.indent + nesting + conf.indent_code,
 333                       conf.width - nesting - 2 * conf.indent_code);
 334         break;
 335     }
 336
 337     /* Do the version ID */
 338     if (conf.include_version_id) {
 339         for (p = sourceform; p; p = p->next)
 340             if (p->type == para_VersionID)
 341                 text_versionid(fp, p->words);
 342     }
 343
 344     /*
 345      * Tidy up
 346      */
 347     fclose(fp);
 348     {
 349         int i;
 350         sfree(conf.achapter.number_suffix);
 351         for (i = 0; i < conf.nasect; i++)
 352             sfree(conf.asect[i].number_suffix);
 353         sfree(conf.asect);
 354         sfree(conf.bullet.text);
 355     }
 356 }
 357
 358 /*
 359  * Convert a wide string into a string of chars. If `result' is
 360  * non-NULL, mallocs the resulting string and stores a pointer to
 361  * it in `*result'. If `result' is NULL, merely checks whether all
 362  * characters in the string are feasible for the output character
 363  * set.
 364  *
 365  * Return is nonzero if all characters are OK. If not all
 366  * characters are OK but `result' is non-NULL, a result _will_
 367  * still be generated!
 368  */
 369 static int text_convert(wchar_t *s, char **result) {
 370     /*
 371      * FIXME. Currently this is ISO8859-1 only.
 372      */
 373     int doing = (result != 0);
 374     int ok = TRUE;
 375     char *p = NULL;
 376     int plen = 0, psize = 0;
 377
 378     for (; *s; s++) {
 379         wchar_t c = *s;
 380         char outc;
 381
 382         if ((c >= 32 && c <= 126) ||
 383             (c >= 160 && c <= 255)) {
 384             /* Char is OK. */
 385             outc = (char)c;
 386         } else {
 387             /* Char is not OK. */
 388             ok = FALSE;
 389             outc = 0xBF;               /* approximate the good old DEC `uh?' */
 390         }
 391         if (doing) {
 392             if (plen >= psize) {
 393                 psize = plen + 256;
 394                 p = resize(p, psize);
 395             }
 396             p[plen++] = outc;
 397         }
 398     }
 399     if (doing) {
 400         p = resize(p, plen+1);
 401         p[plen] = '\0';
 402         *result = p;
 403     }
 404     return ok;
 405 }
 406
 407 static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
 408     char *c;
 409
 410     for (; text && text != end; text = text->next) switch (text->type) {
 411       case word_HyperLink:
 412       case word_HyperEnd:
 413       case word_UpperXref:
 414       case word_LowerXref:
 415       case word_XrefEnd:
 416       case word_IndexRef:
 417         break;
 418
 419       case word_Normal:
 420       case word_Emph:
 421       case word_Code:
 422       case word_WeakCode:
 423       case word_WhiteSpace:
 424       case word_EmphSpace:
 425       case word_CodeSpace:
 426       case word_WkCodeSpace:
 427       case word_Quote:
 428       case word_EmphQuote:
 429       case word_CodeQuote:
 430       case word_WkCodeQuote:
 431         assert(text->type != word_CodeQuote &&
 432                text->type != word_WkCodeQuote);
 433         if (towordstyle(text->type) == word_Emph &&
 434             (attraux(text->aux) == attr_First ||
 435              attraux(text->aux) == attr_Only))
 436             rdaddc(rs, '_');           /* FIXME: configurability */
 437         else if (towordstyle(text->type) == word_Code &&
 438                  (attraux(text->aux) == attr_First ||
 439                   attraux(text->aux) == attr_Only))
 440             rdaddc(rs, '`');           /* FIXME: configurability */
 441         if (removeattr(text->type) == word_Normal) {
 442             if (text_convert(text->text, &c))
 443                 rdaddsc(rs, c);
 444             else
 445                 text_rdaddwc(rs, text->alt, NULL);
 446             sfree(c);
 447         } else if (removeattr(text->type) == word_WhiteSpace) {
 448             rdaddc(rs, ' ');
 449         } else if (removeattr(text->type) == word_Quote) {
 450             rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
 451                                        /* FIXME: configurability */
 452         }
 453         if (towordstyle(text->type) == word_Emph &&
 454             (attraux(text->aux) == attr_Last ||
 455              attraux(text->aux) == attr_Only))
 456             rdaddc(rs, '_');           /* FIXME: configurability */
 457         else if (towordstyle(text->type) == word_Code &&
 458                  (attraux(text->aux) == attr_Last ||
 459                   attraux(text->aux) == attr_Only))
 460             rdaddc(rs, '\'');          /* FIXME: configurability */
 461         break;
 462     }
 463 }
 464
 465 static int text_width(word *);
 466
 467 static int text_width_list(word *text) {
 468     int w = 0;
 469     while (text) {
 470         w += text_width(text);
 471         text = text->next;
 472     }
 473     return w;
 474 }
 475
 476 static int text_width(word *text) {
 477     switch (text->type) {
 478       case word_HyperLink:
 479       case word_HyperEnd:
 480       case word_UpperXref:
 481       case word_LowerXref:
 482       case word_XrefEnd:
 483       case word_IndexRef:
 484         return 0;
 485
 486       case word_Normal:
 487       case word_Emph:
 488       case word_Code:
 489       case word_WeakCode:
 490         return (((text->type == word_Emph ||
 491                   text->type == word_Code)
 492                  ? (attraux(text->aux) == attr_Only ? 2 :
 493                     attraux(text->aux) == attr_Always ? 0 : 1)
 494                  : 0) +
 495                 (text_convert(text->text, NULL) ?
 496                  ustrlen(text->text) :
 497                  text_width_list(text->alt)));
 498
 499       case word_WhiteSpace:
 500       case word_EmphSpace:
 501       case word_CodeSpace:
 502       case word_WkCodeSpace:
 503       case word_Quote:
 504       case word_EmphQuote:
 505       case word_CodeQuote:
 506       case word_WkCodeQuote:
 507         assert(text->type != word_CodeQuote &&
 508                text->type != word_WkCodeQuote);
 509         return (((towordstyle(text->type) == word_Emph ||
 510                   towordstyle(text->type) == word_Code)
 511                  ? (attraux(text->aux) == attr_Only ? 2 :
 512                     attraux(text->aux) == attr_Always ? 0 : 1)
 513                  : 0) + 1);
 514     }
 515     return 0;                          /* should never happen */
 516 }
 517
 518 static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
 519                          alignstruct align, int indent, int width) {
 520     rdstringc t = { 0, 0, NULL };
 521     int margin, length;
 522     int firstlinewidth, wrapwidth;
 523     wrappedline *wrapping, *p;
 524
 525     if (align.just_numbers && nprefix) {
 526         char *c;
 527         text_rdaddwc(&t, nprefix, NULL);
 528         if (text_convert(align.number_suffix, &c)) {
 529             rdaddsc(&t, c);
 530             sfree(c);
 531         }
 532     } else if (!align.just_numbers && tprefix) {
 533         char *c;
 534         text_rdaddwc(&t, tprefix, NULL);
 535         if (text_convert(align.number_suffix, &c)) {
 536             rdaddsc(&t, c);
 537             sfree(c);
 538         }
 539     }
 540     margin = length = (t.text ? strlen(t.text) : 0);
 541
 542     if (align.align == LEFTPLUS) {
 543         margin = indent - margin;
 544         if (margin < 0) margin = 0;
 545         firstlinewidth = indent + width - margin - length;
 546         wrapwidth = width;
 547     } else if (align.align == LEFT || align.align == CENTRE) {
 548         margin = 0;
 549         firstlinewidth = indent + width - length;
 550         wrapwidth = indent + width;
 551     }
 552
 553     wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
 554     for (p = wrapping; p; p = p->next) {
 555         text_rdaddwc(&t, p->begin, p->end);
 556         length = (t.text ? strlen(t.text) : 0);
 557         if (align.align == CENTRE) {
 558             margin = (indent + width - length)/2;
 559             if (margin < 0) margin = 0;
 560         }
 561         fprintf(fp, "%*s%s\n", margin, "", t.text);
 562         if (align.underline != L'\0') {
 563             char *u, uc;
 564             wchar_t uw[2];
 565             uw[0] = align.underline; uw[1] = L'\0';
 566             text_convert(uw, &u);
 567             uc = u[0];
 568             sfree(u);
 569             fprintf(fp, "%*s", margin, "");
 570             while (length--)
 571                 putc(uc, fp);
 572             putc('\n', fp);
 573         }
 574         if (align.align == LEFTPLUS)
 575             margin = indent;
 576         else
 577             margin = 0;
 578         sfree(t.text);
 579         t = empty_rdstringc;
 580     }
 581     wrap_free(wrapping);
 582     putc('\n', fp);
 583
 584     sfree(t.text);
 585 }
 586
 587 static void text_rule(FILE *fp, int indent, int width) {
 588     while (indent--) putc(' ', fp);
 589     while (width--) putc('-', fp);     /* FIXME: configurability! */
 590     putc('\n', fp);
 591     putc('\n', fp);
 592 }
 593
 594 static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
 595                       int indent, int extraindent, int width) {
 596     wrappedline *wrapping, *p;
 597     rdstringc pfx = { 0, 0, NULL };
 598     int e;
 599     int firstlinewidth = width;
 600
 601     if (prefix) {
 602         text_rdaddwc(&pfx, prefix, NULL);
 603         if (prefixextra)
 604             rdaddsc(&pfx, prefixextra);
 605         fprintf(fp, "%*s%s", indent, "", pfx.text);
 606         /* If the prefix is too long, shorten the first line to fit. */
 607         e = extraindent - strlen(pfx.text);
 608         if (e < 0) {
 609             firstlinewidth += e;       /* this decreases it, since e < 0 */
 610             if (firstlinewidth < 0) {
 611                 e = indent + extraindent;
 612                 firstlinewidth = width;
 613                 fprintf(fp, "\n");
 614             } else
 615                 e = 0;
 616         }
 617         sfree(pfx.text);
 618     } else
 619         e = indent + extraindent;
 620
 621     wrapping = wrap_para(text, firstlinewidth, width, text_width);
 622     for (p = wrapping; p; p = p->next) {
 623         rdstringc t = { 0, 0, NULL };
 624         text_rdaddwc(&t, p->begin, p->end);
 625         fprintf(fp, "%*s%s\n", e, "", t.text);
 626         e = indent + extraindent;
 627         sfree(t.text);
 628     }
 629     wrap_free(wrapping);
 630     putc('\n', fp);
 631 }
 632
 633 static void text_codepara(FILE *fp, word *text, int indent, int width) {
 634     for (; text; text = text->next) if (text->type == word_WeakCode) {
 635         char *c;
 636         text_convert(text->text, &c);
 637         if (strlen(c) > (size_t)width) {
 638             /* FIXME: warn */
 639         }
 640         fprintf(fp, "%*s%s\n", indent, "", c);
 641         sfree(c);
 642     }
 643
 644     putc('\n', fp);
 645 }
 646
 647 static void text_versionid(FILE *fp, word *text) {
 648     rdstringc t = { 0, 0, NULL };
 649
 650     rdaddc(&t, '[');                   /* FIXME: configurability */
 651     text_rdaddwc(&t, text, NULL);
 652     rdaddc(&t, ']');                   /* FIXME: configurability */
 653
 654     fprintf(fp, "%s\n", t.text);
 655     sfree(t.text);
 656 }