mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_text.c

   1 /*
   2  * text backend for Halibut
   3  */
   4
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <assert.h>
   8 #include "halibut.h"
   9
  10 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
  11 typedef struct {
  12     alignment align;
  13     int just_numbers;
  14     wchar_t underline;
  15     wchar_t *number_suffix;
  16 } alignstruct;
  17
  18 typedef struct {
  19     int indent, indent_code;
  20     int listindentbefore, listindentafter;
  21     int width;
  22     alignstruct atitle, achapter, *asect;
  23     int nasect;
  24     int include_version_id;
  25     int indent_preambles;
  26     word bullet;
  27     char *filename;
  28 } textconfig;
  29
  30 static int text_convert(wchar_t *, char **);
  31
  32 static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
  33 static void text_rule(FILE *, int, int);
  34 static void text_para(FILE *, word *, char *, word *, int, int, int);
  35 static void text_codepara(FILE *, word *, int, int);
  36 static void text_versionid(FILE *, word *);
  37
  38 static alignment utoalign(wchar_t *p) {
  39     if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
  40         return CENTRE;
  41     if (!ustricmp(p, L"leftplus"))
  42         return LEFTPLUS;
  43     return LEFT;
  44 }
  45
  46 static textconfig text_configure(paragraph *source) {
  47     textconfig ret;
  48
  49     /*
  50      * Non-negotiables.
  51      */
  52     ret.bullet.next = NULL;
  53     ret.bullet.alt = NULL;
  54     ret.bullet.type = word_Normal;
  55     ret.atitle.just_numbers = FALSE;   /* ignored */
  56
  57     /*
  58      * Defaults.
  59      */
  60     ret.indent = 7;
  61     ret.indent_code = 2;
  62     ret.listindentbefore = 1;
  63     ret.listindentafter = 3;
  64     ret.width = 68;
  65     ret.atitle.align = CENTRE;
  66     ret.atitle.underline = L'=';
  67     ret.achapter.align = LEFT;
  68     ret.achapter.just_numbers = FALSE;
  69     ret.achapter.number_suffix = L": ";
  70     ret.achapter.underline = L'-';
  71     ret.nasect = 1;
  72     ret.asect = mknewa(alignstruct, ret.nasect);
  73     ret.asect[0].align = LEFTPLUS;
  74     ret.asect[0].just_numbers = TRUE;
  75     ret.asect[0].number_suffix = L" ";
  76     ret.asect[0].underline = L'\0';
  77     ret.include_version_id = TRUE;
  78     ret.indent_preambles = FALSE;
  79     ret.bullet.text = L"-";
  80     ret.filename = dupstr("output.txt");
  81
  82     for (; source; source = source->next) {
  83         if (source->type == para_Config) {
  84             if (!ustricmp(source->keyword, L"text-indent")) {
  85                 ret.indent = utoi(uadv(source->keyword));
  86             } else if (!ustricmp(source->keyword, L"text-filename")) {
  87                 sfree(ret.filename);
  88                 ret.filename = dupstr(adv(source->origkeyword));
  89             } else if (!ustricmp(source->keyword, L"text-indent-code")) {
  90                 ret.indent_code = utoi(uadv(source->keyword));
  91             } else if (!ustricmp(source->keyword, L"text-width")) {
  92                 ret.width = utoi(uadv(source->keyword));
  93             } else if (!ustricmp(source->keyword, L"text-list-indent")) {
  94                 ret.listindentbefore = utoi(uadv(source->keyword));
  95             } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
  96                 ret.listindentafter = utoi(uadv(source->keyword));
  97             } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
  98                 ret.achapter.align = utoalign(uadv(source->keyword));
  99             } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
 100                 ret.achapter.underline = *uadv(source->keyword);
 101             } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
 102                 ret.achapter.just_numbers = utob(uadv(source->keyword));
 103             } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
 104                 ret.achapter.number_suffix = uadv(source->keyword);
 105             } else if (!ustricmp(source->keyword, L"text-section-align")) {
 106                 wchar_t *p = uadv(source->keyword);
 107                 int n = 0;
 108                 if (uisdigit(*p)) {
 109                     n = utoi(p);
 110                     p = uadv(p);
 111                 }
 112                 if (n >= ret.nasect) {
 113                     int i;
 114                     ret.asect = resize(ret.asect, n+1);
 115                     for (i = ret.nasect; i <= n; i++)
 116                         ret.asect[i] = ret.asect[ret.nasect-1];
 117                     ret.nasect = n+1;
 118                 }
 119                 ret.asect[n].align = utoalign(p);
 120             } else if (!ustricmp(source->keyword, L"text-section-underline")) {
 121                 wchar_t *p = uadv(source->keyword);
 122                 int n = 0;
 123                 if (uisdigit(*p)) {
 124                     n = utoi(p);
 125                     p = uadv(p);
 126                 }
 127                 if (n >= ret.nasect) {
 128                     int i;
 129                     ret.asect = resize(ret.asect, n+1);
 130                     for (i = ret.nasect; i <= n; i++)
 131                         ret.asect[i] = ret.asect[ret.nasect-1];
 132                     ret.nasect = n+1;
 133                 }
 134                 ret.asect[n].underline = *p;
 135             } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
 136                 wchar_t *p = uadv(source->keyword);
 137                 int n = 0;
 138                 if (uisdigit(*p)) {
 139                     n = utoi(p);
 140                     p = uadv(p);
 141                 }
 142                 if (n >= ret.nasect) {
 143                     int i;
 144                     ret.asect = resize(ret.asect, n+1);
 145                     for (i = ret.nasect; i <= n; i++)
 146                         ret.asect[i] = ret.asect[ret.nasect-1];
 147                     ret.nasect = n+1;
 148                 }
 149                 ret.asect[n].just_numbers = utob(p);
 150             } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
 151                 wchar_t *p = uadv(source->keyword);
 152                 int n = 0;
 153                 if (uisdigit(*p)) {
 154                     n = utoi(p);
 155                     p = uadv(p);
 156                 }
 157                 if (n >= ret.nasect) {
 158                     int i;
 159                     ret.asect = resize(ret.asect, n+1);
 160                     for (i = ret.nasect; i <= n; i++) {
 161                         ret.asect[i] = ret.asect[ret.nasect-1];
 162                     }
 163                     ret.nasect = n+1;
 164                 }
 165                 ret.asect[n].number_suffix = p;
 166             } else if (!ustricmp(source->keyword, L"text-title-align")) {
 167                 ret.atitle.align = utoalign(uadv(source->keyword));
 168             } else if (!ustricmp(source->keyword, L"text-title-underline")) {
 169                 ret.atitle.underline = *uadv(source->keyword);
 170             } else if (!ustricmp(source->keyword, L"text-versionid")) {
 171                 ret.include_version_id = utob(uadv(source->keyword));
 172             } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
 173                 ret.indent_preambles = utob(uadv(source->keyword));
 174             } else if (!ustricmp(source->keyword, L"text-bullet")) {
 175                 ret.bullet.text = uadv(source->keyword);
 176             }
 177         }
 178     }
 179
 180     return ret;
 181 }
 182
 183 paragraph *text_config_filename(char *filename)
 184 {
 185     return cmdline_cfg_simple("text-filename", filename, NULL);
 186 }
 187
 188 void text_backend(paragraph *sourceform, keywordlist *keywords,
 189                   indexdata *idx, void *unused) {
 190     paragraph *p;
 191     textconfig conf;
 192     word *prefix, *body, *wp;
 193     word spaceword;
 194     FILE *fp;
 195     char *prefixextra;
 196     int nesting, nestindent;
 197     int indentb, indenta;
 198
 199     IGNORE(unused);
 200     IGNORE(keywords);                  /* we don't happen to need this */
 201     IGNORE(idx);                       /* or this */
 202
 203     conf = text_configure(sourceform);
 204
 205     /*
 206      * Open the output file.
 207      */
 208     fp = fopen(conf.filename, "w");
 209     if (!fp) {
 210         error(err_cantopenw, conf.filename);
 211         return;
 212     }
 213
 214     /* Do the title */
 215     for (p = sourceform; p; p = p->next)
 216         if (p->type == para_Title)
 217             text_heading(fp, NULL, NULL, p->words,
 218                          conf.atitle, conf.indent, conf.width);
 219
 220     nestindent = conf.listindentbefore + conf.listindentafter;
 221     nesting = (conf.indent_preambles ? 0 : -conf.indent);
 222
 223     /* Do the main document */
 224     for (p = sourceform; p; p = p->next) switch (p->type) {
 225
 226       case para_QuotePush:
 227         nesting += 2;
 228         break;
 229       case para_QuotePop:
 230         nesting -= 2;
 231         assert(nesting >= 0);
 232         break;
 233
 234       case para_LcontPush:
 235         nesting += nestindent;
 236         break;
 237       case para_LcontPop:
 238         nesting -= nestindent;
 239         assert(nesting >= 0);
 240         break;
 241
 242         /*
 243          * Things we ignore because we've already processed them or
 244          * aren't going to touch them in this pass.
 245          */
 246       case para_IM:
 247       case para_BR:
 248       case para_Biblio:                /* only touch BiblioCited */
 249       case para_VersionID:
 250       case para_NoCite:
 251       case para_Title:
 252         break;
 253
 254         /*
 255          * Chapter titles.
 256          */
 257       case para_Chapter:
 258       case para_Appendix:
 259       case para_UnnumberedChapter:
 260         text_heading(fp, p->kwtext, p->kwtext2, p->words,
 261                      conf.achapter, conf.indent, conf.width);
 262         nesting = 0;
 263         break;
 264
 265       case para_Heading:
 266       case para_Subsect:
 267         text_heading(fp, p->kwtext, p->kwtext2, p->words,
 268                      conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
 269                      conf.indent, conf.width);
 270         break;
 271
 272       case para_Rule:
 273         text_rule(fp, conf.indent + nesting, conf.width - nesting);
 274         break;
 275
 276       case para_Normal:
 277       case para_Copyright:
 278       case para_DescribedThing:
 279       case para_Description:
 280       case para_BiblioCited:
 281       case para_Bullet:
 282       case para_NumberedList:
 283         if (p->type == para_Bullet) {
 284             prefix = &conf.bullet;
 285             prefixextra = NULL;
 286             indentb = conf.listindentbefore;
 287             indenta = conf.listindentafter;
 288         } else if (p->type == para_NumberedList) {
 289             prefix = p->kwtext;
 290             prefixextra = ".";         /* FIXME: configurability */
 291             indentb = conf.listindentbefore;
 292             indenta = conf.listindentafter;
 293         } else if (p->type == para_Description) {
 294             prefix = NULL;
 295             prefixextra = NULL;
 296             indentb = conf.listindentbefore;
 297             indenta = conf.listindentafter;
 298         } else {
 299             prefix = NULL;
 300             prefixextra = NULL;
 301             indentb = indenta = 0;
 302         }
 303         if (p->type == para_BiblioCited) {
 304             body = dup_word_list(p->kwtext);
 305             for (wp = body; wp->next; wp = wp->next);
 306             wp->next = &spaceword;
 307             spaceword.next = p->words;
 308             spaceword.alt = NULL;
 309             spaceword.type = word_WhiteSpace;
 310             spaceword.text = NULL;
 311         } else {
 312             wp = NULL;
 313             body = p->words;
 314         }
 315         text_para(fp, prefix, prefixextra, body,
 316                   conf.indent + nesting + indentb, indenta,
 317                   conf.width - nesting - indentb - indenta);
 318         if (wp) {
 319             wp->next = NULL;
 320             free_word_list(body);
 321         }
 322         break;
 323
 324       case para_Code:
 325         text_codepara(fp, p->words,
 326                       conf.indent + nesting + conf.indent_code,
 327                       conf.width - nesting - 2 * conf.indent_code);
 328         break;
 329     }
 330
 331     /* Do the version ID */
 332     if (conf.include_version_id) {
 333         for (p = sourceform; p; p = p->next)
 334             if (p->type == para_VersionID)
 335                 text_versionid(fp, p->words);
 336     }
 337
 338     /*
 339      * Tidy up
 340      */
 341     fclose(fp);
 342     sfree(conf.asect);
 343     sfree(conf.filename);
 344 }
 345
 346 /*
 347  * Convert a wide string into a string of chars. If `result' is
 348  * non-NULL, mallocs the resulting string and stores a pointer to
 349  * it in `*result'. If `result' is NULL, merely checks whether all
 350  * characters in the string are feasible for the output character
 351  * set.
 352  *
 353  * Return is nonzero if all characters are OK. If not all
 354  * characters are OK but `result' is non-NULL, a result _will_
 355  * still be generated!
 356  */
 357 static int text_convert(wchar_t *s, char **result) {
 358     /*
 359      * FIXME. Currently this is ISO8859-1 only.
 360      */
 361     int doing = (result != 0);
 362     int ok = TRUE;
 363     char *p = NULL;
 364     int plen = 0, psize = 0;
 365
 366     for (; *s; s++) {
 367         wchar_t c = *s;
 368         char outc;
 369
 370         if ((c >= 32 && c <= 126) ||
 371             (c >= 160 && c <= 255)) {
 372             /* Char is OK. */
 373             outc = (char)c;
 374         } else {
 375             /* Char is not OK. */
 376             ok = FALSE;
 377             outc = 0xBF;               /* approximate the good old DEC `uh?' */
 378         }
 379         if (doing) {
 380             if (plen >= psize) {
 381                 psize = plen + 256;
 382                 p = resize(p, psize);
 383             }
 384             p[plen++] = outc;
 385         }
 386     }
 387     if (doing) {
 388         p = resize(p, plen+1);
 389         p[plen] = '\0';
 390         *result = p;
 391     }
 392     return ok;
 393 }
 394
 395 static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
 396     char *c;
 397
 398     for (; text && text != end; text = text->next) switch (text->type) {
 399       case word_HyperLink:
 400       case word_HyperEnd:
 401       case word_UpperXref:
 402       case word_LowerXref:
 403       case word_XrefEnd:
 404       case word_IndexRef:
 405         break;
 406
 407       case word_Normal:
 408       case word_Emph:
 409       case word_Code:
 410       case word_WeakCode:
 411       case word_WhiteSpace:
 412       case word_EmphSpace:
 413       case word_CodeSpace:
 414       case word_WkCodeSpace:
 415       case word_Quote:
 416       case word_EmphQuote:
 417       case word_CodeQuote:
 418       case word_WkCodeQuote:
 419         assert(text->type != word_CodeQuote &&
 420                text->type != word_WkCodeQuote);
 421         if (towordstyle(text->type) == word_Emph &&
 422             (attraux(text->aux) == attr_First ||
 423              attraux(text->aux) == attr_Only))
 424             rdaddc(rs, '_');           /* FIXME: configurability */
 425         else if (towordstyle(text->type) == word_Code &&
 426                  (attraux(text->aux) == attr_First ||
 427                   attraux(text->aux) == attr_Only))
 428             rdaddc(rs, '`');           /* FIXME: configurability */
 429         if (removeattr(text->type) == word_Normal) {
 430             if (text_convert(text->text, &c) || !text->alt)
 431                 rdaddsc(rs, c);
 432             else
 433                 text_rdaddwc(rs, text->alt, NULL);
 434             sfree(c);
 435         } else if (removeattr(text->type) == word_WhiteSpace) {
 436             rdaddc(rs, ' ');
 437         } else if (removeattr(text->type) == word_Quote) {
 438             rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
 439                                        /* FIXME: configurability */
 440         }
 441         if (towordstyle(text->type) == word_Emph &&
 442             (attraux(text->aux) == attr_Last ||
 443              attraux(text->aux) == attr_Only))
 444             rdaddc(rs, '_');           /* FIXME: configurability */
 445         else if (towordstyle(text->type) == word_Code &&
 446                  (attraux(text->aux) == attr_Last ||
 447                   attraux(text->aux) == attr_Only))
 448             rdaddc(rs, '\'');          /* FIXME: configurability */
 449         break;
 450     }
 451 }
 452
 453 static int text_width(void *, word *);
 454
 455 static int text_width_list(void *ctx, word *text) {
 456     int w = 0;
 457     while (text) {
 458         w += text_width(ctx, text);
 459         text = text->next;
 460     }
 461     return w;
 462 }
 463
 464 static int text_width(void *ctx, word *text) {
 465     IGNORE(ctx);
 466
 467     switch (text->type) {
 468       case word_HyperLink:
 469       case word_HyperEnd:
 470       case word_UpperXref:
 471       case word_LowerXref:
 472       case word_XrefEnd:
 473       case word_IndexRef:
 474         return 0;
 475
 476       case word_Normal:
 477       case word_Emph:
 478       case word_Code:
 479       case word_WeakCode:
 480         return (((text->type == word_Emph ||
 481                   text->type == word_Code)
 482                  ? (attraux(text->aux) == attr_Only ? 2 :
 483                     attraux(text->aux) == attr_Always ? 0 : 1)
 484                  : 0) +
 485                 (text_convert(text->text, NULL) || !text->alt ?
 486                  ustrlen(text->text) :
 487                  text_width_list(ctx, text->alt)));
 488
 489       case word_WhiteSpace:
 490       case word_EmphSpace:
 491       case word_CodeSpace:
 492       case word_WkCodeSpace:
 493       case word_Quote:
 494       case word_EmphQuote:
 495       case word_CodeQuote:
 496       case word_WkCodeQuote:
 497         assert(text->type != word_CodeQuote &&
 498                text->type != word_WkCodeQuote);
 499         return (((towordstyle(text->type) == word_Emph ||
 500                   towordstyle(text->type) == word_Code)
 501                  ? (attraux(text->aux) == attr_Only ? 2 :
 502                     attraux(text->aux) == attr_Always ? 0 : 1)
 503                  : 0) + 1);
 504     }
 505     return 0;                          /* should never happen */
 506 }
 507
 508 static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
 509                          alignstruct align, int indent, int width) {
 510     rdstringc t = { 0, 0, NULL };
 511     int margin, length;
 512     int firstlinewidth, wrapwidth;
 513     wrappedline *wrapping, *p;
 514
 515     if (align.just_numbers && nprefix) {
 516         char *c;
 517         text_rdaddwc(&t, nprefix, NULL);
 518         if (text_convert(align.number_suffix, &c)) {
 519             rdaddsc(&t, c);
 520             sfree(c);
 521         }
 522     } else if (!align.just_numbers && tprefix) {
 523         char *c;
 524         text_rdaddwc(&t, tprefix, NULL);
 525         if (text_convert(align.number_suffix, &c)) {
 526             rdaddsc(&t, c);
 527             sfree(c);
 528         }
 529     }
 530     margin = length = (t.text ? strlen(t.text) : 0);
 531
 532     if (align.align == LEFTPLUS) {
 533         margin = indent - margin;
 534         if (margin < 0) margin = 0;
 535         firstlinewidth = indent + width - margin - length;
 536         wrapwidth = width;
 537     } else if (align.align == LEFT || align.align == CENTRE) {
 538         margin = 0;
 539         firstlinewidth = indent + width - length;
 540         wrapwidth = indent + width;
 541     }
 542
 543     wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width, NULL, 0);
 544     for (p = wrapping; p; p = p->next) {
 545         text_rdaddwc(&t, p->begin, p->end);
 546         length = (t.text ? strlen(t.text) : 0);
 547         if (align.align == CENTRE) {
 548             margin = (indent + width - length)/2;
 549             if (margin < 0) margin = 0;
 550         }
 551         fprintf(fp, "%*s%s\n", margin, "", t.text);
 552         if (align.underline != L'\0') {
 553             char *u, uc;
 554             wchar_t uw[2];
 555             uw[0] = align.underline; uw[1] = L'\0';
 556             text_convert(uw, &u);
 557             uc = u[0];
 558             sfree(u);
 559             fprintf(fp, "%*s", margin, "");
 560             while (length--)
 561                 putc(uc, fp);
 562             putc('\n', fp);
 563         }
 564         if (align.align == LEFTPLUS)
 565             margin = indent;
 566         else
 567             margin = 0;
 568         sfree(t.text);
 569         t = empty_rdstringc;
 570     }
 571     wrap_free(wrapping);
 572     putc('\n', fp);
 573
 574     sfree(t.text);
 575 }
 576
 577 static void text_rule(FILE *fp, int indent, int width) {
 578     while (indent--) putc(' ', fp);
 579     while (width--) putc('-', fp);     /* FIXME: configurability! */
 580     putc('\n', fp);
 581     putc('\n', fp);
 582 }
 583
 584 static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
 585                       int indent, int extraindent, int width) {
 586     wrappedline *wrapping, *p;
 587     rdstringc pfx = { 0, 0, NULL };
 588     int e;
 589     int firstlinewidth = width;
 590
 591     if (prefix) {
 592         text_rdaddwc(&pfx, prefix, NULL);
 593         if (prefixextra)
 594             rdaddsc(&pfx, prefixextra);
 595         fprintf(fp, "%*s%s", indent, "", pfx.text);
 596         /* If the prefix is too long, shorten the first line to fit. */
 597         e = extraindent - strlen(pfx.text);
 598         if (e < 0) {
 599             firstlinewidth += e;       /* this decreases it, since e < 0 */
 600             if (firstlinewidth < 0) {
 601                 e = indent + extraindent;
 602                 firstlinewidth = width;
 603                 fprintf(fp, "\n");
 604             } else
 605                 e = 0;
 606         }
 607         sfree(pfx.text);
 608     } else
 609         e = indent + extraindent;
 610
 611     wrapping = wrap_para(text, firstlinewidth, width, text_width, NULL, 0);
 612     for (p = wrapping; p; p = p->next) {
 613         rdstringc t = { 0, 0, NULL };
 614         text_rdaddwc(&t, p->begin, p->end);
 615         fprintf(fp, "%*s%s\n", e, "", t.text);
 616         e = indent + extraindent;
 617         sfree(t.text);
 618     }
 619     wrap_free(wrapping);
 620     putc('\n', fp);
 621 }
 622
 623 static void text_codepara(FILE *fp, word *text, int indent, int width) {
 624     for (; text; text = text->next) if (text->type == word_WeakCode) {
 625         char *c;
 626         text_convert(text->text, &c);
 627         if (strlen(c) > (size_t)width) {
 628             /* FIXME: warn */
 629         }
 630         fprintf(fp, "%*s%s\n", indent, "", c);
 631         sfree(c);
 632     }
 633
 634     putc('\n', fp);
 635 }
 636
 637 static void text_versionid(FILE *fp, word *text) {
 638     rdstringc t = { 0, 0, NULL };
 639
 640     rdaddc(&t, '[');                   /* FIXME: configurability */
 641     text_rdaddwc(&t, text, NULL);
 642     rdaddc(&t, ']');                   /* FIXME: configurability */
 643
 644     fprintf(fp, "%s\n", t.text);
 645     sfree(t.text);
 646 }