mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_text.c

   1 /*
   2  * text backend for Halibut
   3  */
   4
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <assert.h>
   8 #include "halibut.h"
   9
  10 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
  11 typedef struct {
  12     alignment align;
  13     int just_numbers;
  14     wchar_t underline;
  15     wchar_t *number_suffix;
  16 } alignstruct;
  17
  18 typedef struct {
  19     int indent, indent_code;
  20     int listindentbefore, listindentafter;
  21     int width;
  22     alignstruct atitle, achapter, *asect;
  23     int nasect;
  24     int include_version_id;
  25     int indent_preambles;
  26     word bullet;
  27     char *filename;
  28 } textconfig;
  29
  30 static int text_convert(wchar_t *, char **);
  31
  32 static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
  33 static void text_rule(FILE *, int, int);
  34 static void text_para(FILE *, word *, char *, word *, int, int, int);
  35 static void text_codepara(FILE *, word *, int, int);
  36 static void text_versionid(FILE *, word *);
  37
  38 static alignment utoalign(wchar_t *p) {
  39     if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
  40         return CENTRE;
  41     if (!ustricmp(p, L"leftplus"))
  42         return LEFTPLUS;
  43     return LEFT;
  44 }
  45
  46 static textconfig text_configure(paragraph *source) {
  47     textconfig ret;
  48
  49     /*
  50      * Non-negotiables.
  51      */
  52     ret.bullet.next = NULL;
  53     ret.bullet.alt = NULL;
  54     ret.bullet.type = word_Normal;
  55     ret.atitle.just_numbers = FALSE;   /* ignored */
  56
  57     /*
  58      * Defaults.
  59      */
  60     ret.indent = 7;
  61     ret.indent_code = 2;
  62     ret.listindentbefore = 1;
  63     ret.listindentafter = 3;
  64     ret.width = 68;
  65     ret.atitle.align = CENTRE;
  66     ret.atitle.underline = L'=';
  67     ret.achapter.align = LEFT;
  68     ret.achapter.just_numbers = FALSE;
  69     ret.achapter.number_suffix = L": ";
  70     ret.achapter.underline = L'-';
  71     ret.nasect = 1;
  72     ret.asect = mknewa(alignstruct, ret.nasect);
  73     ret.asect[0].align = LEFTPLUS;
  74     ret.asect[0].just_numbers = TRUE;
  75     ret.asect[0].number_suffix = L" ";
  76     ret.asect[0].underline = L'\0';
  77     ret.include_version_id = TRUE;
  78     ret.indent_preambles = FALSE;
  79     ret.bullet.text = L"-";
  80     ret.filename = dupstr("output.txt");
  81
  82     for (; source; source = source->next) {
  83         if (source->type == para_Config) {
  84             if (!ustricmp(source->keyword, L"text-indent")) {
  85                 ret.indent = utoi(uadv(source->keyword));
  86             } else if (!ustricmp(source->keyword, L"text-filename")) {
  87                 sfree(ret.filename);
  88                 ret.filename = utoa_dup(uadv(source->keyword));
  89             } else if (!ustricmp(source->keyword, L"text-indent-code")) {
  90                 ret.indent_code = utoi(uadv(source->keyword));
  91             } else if (!ustricmp(source->keyword, L"text-width")) {
  92                 ret.width = utoi(uadv(source->keyword));
  93             } else if (!ustricmp(source->keyword, L"text-list-indent")) {
  94                 ret.listindentbefore = utoi(uadv(source->keyword));
  95             } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
  96                 ret.listindentafter = utoi(uadv(source->keyword));
  97             } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
  98                 ret.achapter.align = utoalign(uadv(source->keyword));
  99             } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
 100                 ret.achapter.underline = *uadv(source->keyword);
 101             } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
 102                 ret.achapter.just_numbers = utob(uadv(source->keyword));
 103             } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
 104                 ret.achapter.number_suffix = uadv(source->keyword);
 105             } else if (!ustricmp(source->keyword, L"text-section-align")) {
 106                 wchar_t *p = uadv(source->keyword);
 107                 int n = 0;
 108                 if (uisdigit(*p)) {
 109                     n = utoi(p);
 110                     p = uadv(p);
 111                 }
 112                 if (n >= ret.nasect) {
 113                     int i;
 114                     ret.asect = resize(ret.asect, n+1);
 115                     for (i = ret.nasect; i <= n; i++)
 116                         ret.asect[i] = ret.asect[ret.nasect-1];
 117                     ret.nasect = n+1;
 118                 }
 119                 ret.asect[n].align = utoalign(p);
 120             } else if (!ustricmp(source->keyword, L"text-section-underline")) {
 121                 wchar_t *p = uadv(source->keyword);
 122                 int n = 0;
 123                 if (uisdigit(*p)) {
 124                     n = utoi(p);
 125                     p = uadv(p);
 126                 }
 127                 if (n >= ret.nasect) {
 128                     int i;
 129                     ret.asect = resize(ret.asect, n+1);
 130                     for (i = ret.nasect; i <= n; i++)
 131                         ret.asect[i] = ret.asect[ret.nasect-1];
 132                     ret.nasect = n+1;
 133                 }
 134                 ret.asect[n].underline = *p;
 135             } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
 136                 wchar_t *p = uadv(source->keyword);
 137                 int n = 0;
 138                 if (uisdigit(*p)) {
 139                     n = utoi(p);
 140                     p = uadv(p);
 141                 }
 142                 if (n >= ret.nasect) {
 143                     int i;
 144                     ret.asect = resize(ret.asect, n+1);
 145                     for (i = ret.nasect; i <= n; i++)
 146                         ret.asect[i] = ret.asect[ret.nasect-1];
 147                     ret.nasect = n+1;
 148                 }
 149                 ret.asect[n].just_numbers = utob(p);
 150             } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
 151                 wchar_t *p = uadv(source->keyword);
 152                 int n = 0;
 153                 if (uisdigit(*p)) {
 154                     n = utoi(p);
 155                     p = uadv(p);
 156                 }
 157                 if (n >= ret.nasect) {
 158                     int i;
 159                     ret.asect = resize(ret.asect, n+1);
 160                     for (i = ret.nasect; i <= n; i++) {
 161                         ret.asect[i] = ret.asect[ret.nasect-1];
 162                     }
 163                     ret.nasect = n+1;
 164                 }
 165                 ret.asect[n].number_suffix = p;
 166             } else if (!ustricmp(source->keyword, L"text-title-align")) {
 167                 ret.atitle.align = utoalign(uadv(source->keyword));
 168             } else if (!ustricmp(source->keyword, L"text-title-underline")) {
 169                 ret.atitle.underline = *uadv(source->keyword);
 170             } else if (!ustricmp(source->keyword, L"text-versionid")) {
 171                 ret.include_version_id = utob(uadv(source->keyword));
 172             } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
 173                 ret.indent_preambles = utob(uadv(source->keyword));
 174             } else if (!ustricmp(source->keyword, L"text-bullet")) {
 175                 ret.bullet.text = uadv(source->keyword);
 176             }
 177         }
 178     }
 179
 180     return ret;
 181 }
 182
 183 paragraph *text_config_filename(char *filename)
 184 {
 185     paragraph *p;
 186     wchar_t *ufilename, *up;
 187     int len;
 188
 189     p = mknew(paragraph);
 190     memset(p, 0, sizeof(*p));
 191     p->type = para_Config;
 192     p->next = NULL;
 193     p->fpos.filename = "<command line>";
 194     p->fpos.line = p->fpos.col = -1;
 195
 196     ufilename = ufroma_dup(filename);
 197     len = ustrlen(ufilename) + 2 + lenof(L"text-filename");
 198     p->keyword = mknewa(wchar_t, len);
 199     up = p->keyword;
 200     ustrcpy(up, L"text-filename");
 201     up = uadv(up);
 202     ustrcpy(up, ufilename);
 203     up = uadv(up);
 204     *up = L'\0';
 205     assert(up - p->keyword < len);
 206     sfree(ufilename);
 207
 208     return p;
 209 }
 210
 211 void text_backend(paragraph *sourceform, keywordlist *keywords,
 212                   indexdata *idx) {
 213     paragraph *p;
 214     textconfig conf;
 215     word *prefix, *body, *wp;
 216     word spaceword;
 217     FILE *fp;
 218     char *prefixextra;
 219     int nesting, nestindent;
 220     int indentb, indenta;
 221
 222     IGNORE(keywords);                  /* we don't happen to need this */
 223     IGNORE(idx);                       /* or this */
 224
 225     conf = text_configure(sourceform);
 226
 227     /*
 228      * Open the output file.
 229      */
 230     fp = fopen(conf.filename, "w");
 231     if (!fp) {
 232         error(err_cantopenw, conf.filename);
 233         return;
 234     }
 235
 236     /* Do the title */
 237     for (p = sourceform; p; p = p->next)
 238         if (p->type == para_Title)
 239             text_heading(fp, NULL, NULL, p->words,
 240                          conf.atitle, conf.indent, conf.width);
 241
 242     nestindent = conf.listindentbefore + conf.listindentafter;
 243     nesting = (conf.indent_preambles ? 0 : -conf.indent);
 244
 245     /* Do the main document */
 246     for (p = sourceform; p; p = p->next) switch (p->type) {
 247
 248       case para_QuotePush:
 249         nesting += 2;
 250         break;
 251       case para_QuotePop:
 252         nesting -= 2;
 253         assert(nesting >= 0);
 254         break;
 255
 256       case para_LcontPush:
 257         nesting += nestindent;
 258         break;
 259       case para_LcontPop:
 260         nesting -= nestindent;
 261         assert(nesting >= 0);
 262         break;
 263
 264         /*
 265          * Things we ignore because we've already processed them or
 266          * aren't going to touch them in this pass.
 267          */
 268       case para_IM:
 269       case para_BR:
 270       case para_Biblio:                /* only touch BiblioCited */
 271       case para_VersionID:
 272       case para_NoCite:
 273       case para_Title:
 274         break;
 275
 276         /*
 277          * Chapter titles.
 278          */
 279       case para_Chapter:
 280       case para_Appendix:
 281       case para_UnnumberedChapter:
 282         text_heading(fp, p->kwtext, p->kwtext2, p->words,
 283                      conf.achapter, conf.indent, conf.width);
 284         nesting = 0;
 285         break;
 286
 287       case para_Heading:
 288       case para_Subsect:
 289         text_heading(fp, p->kwtext, p->kwtext2, p->words,
 290                      conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
 291                      conf.indent, conf.width);
 292         break;
 293
 294       case para_Rule:
 295         text_rule(fp, conf.indent + nesting, conf.width - nesting);
 296         break;
 297
 298       case para_Normal:
 299       case para_Copyright:
 300       case para_DescribedThing:
 301       case para_Description:
 302       case para_BiblioCited:
 303       case para_Bullet:
 304       case para_NumberedList:
 305         if (p->type == para_Bullet) {
 306             prefix = &conf.bullet;
 307             prefixextra = NULL;
 308             indentb = conf.listindentbefore;
 309             indenta = conf.listindentafter;
 310         } else if (p->type == para_NumberedList) {
 311             prefix = p->kwtext;
 312             prefixextra = ".";         /* FIXME: configurability */
 313             indentb = conf.listindentbefore;
 314             indenta = conf.listindentafter;
 315         } else if (p->type == para_Description) {
 316             prefix = NULL;
 317             prefixextra = NULL;
 318             indentb = conf.listindentbefore;
 319             indenta = conf.listindentafter;
 320         } else {
 321             prefix = NULL;
 322             prefixextra = NULL;
 323             indentb = indenta = 0;
 324         }
 325         if (p->type == para_BiblioCited) {
 326             body = dup_word_list(p->kwtext);
 327             for (wp = body; wp->next; wp = wp->next);
 328             wp->next = &spaceword;
 329             spaceword.next = p->words;
 330             spaceword.alt = NULL;
 331             spaceword.type = word_WhiteSpace;
 332             spaceword.text = NULL;
 333         } else {
 334             wp = NULL;
 335             body = p->words;
 336         }
 337         text_para(fp, prefix, prefixextra, body,
 338                   conf.indent + nesting + indentb, indenta,
 339                   conf.width - nesting - indentb - indenta);
 340         if (wp) {
 341             wp->next = NULL;
 342             free_word_list(body);
 343         }
 344         break;
 345
 346       case para_Code:
 347         text_codepara(fp, p->words,
 348                       conf.indent + nesting + conf.indent_code,
 349                       conf.width - nesting - 2 * conf.indent_code);
 350         break;
 351     }
 352
 353     /* Do the version ID */
 354     if (conf.include_version_id) {
 355         for (p = sourceform; p; p = p->next)
 356             if (p->type == para_VersionID)
 357                 text_versionid(fp, p->words);
 358     }
 359
 360     /*
 361      * Tidy up
 362      */
 363     fclose(fp);
 364     sfree(conf.asect);
 365     sfree(conf.filename);
 366 }
 367
 368 /*
 369  * Convert a wide string into a string of chars. If `result' is
 370  * non-NULL, mallocs the resulting string and stores a pointer to
 371  * it in `*result'. If `result' is NULL, merely checks whether all
 372  * characters in the string are feasible for the output character
 373  * set.
 374  *
 375  * Return is nonzero if all characters are OK. If not all
 376  * characters are OK but `result' is non-NULL, a result _will_
 377  * still be generated!
 378  */
 379 static int text_convert(wchar_t *s, char **result) {
 380     /*
 381      * FIXME. Currently this is ISO8859-1 only.
 382      */
 383     int doing = (result != 0);
 384     int ok = TRUE;
 385     char *p = NULL;
 386     int plen = 0, psize = 0;
 387
 388     for (; *s; s++) {
 389         wchar_t c = *s;
 390         char outc;
 391
 392         if ((c >= 32 && c <= 126) ||
 393             (c >= 160 && c <= 255)) {
 394             /* Char is OK. */
 395             outc = (char)c;
 396         } else {
 397             /* Char is not OK. */
 398             ok = FALSE;
 399             outc = 0xBF;               /* approximate the good old DEC `uh?' */
 400         }
 401         if (doing) {
 402             if (plen >= psize) {
 403                 psize = plen + 256;
 404                 p = resize(p, psize);
 405             }
 406             p[plen++] = outc;
 407         }
 408     }
 409     if (doing) {
 410         p = resize(p, plen+1);
 411         p[plen] = '\0';
 412         *result = p;
 413     }
 414     return ok;
 415 }
 416
 417 static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
 418     char *c;
 419
 420     for (; text && text != end; text = text->next) switch (text->type) {
 421       case word_HyperLink:
 422       case word_HyperEnd:
 423       case word_UpperXref:
 424       case word_LowerXref:
 425       case word_XrefEnd:
 426       case word_IndexRef:
 427         break;
 428
 429       case word_Normal:
 430       case word_Emph:
 431       case word_Code:
 432       case word_WeakCode:
 433       case word_WhiteSpace:
 434       case word_EmphSpace:
 435       case word_CodeSpace:
 436       case word_WkCodeSpace:
 437       case word_Quote:
 438       case word_EmphQuote:
 439       case word_CodeQuote:
 440       case word_WkCodeQuote:
 441         assert(text->type != word_CodeQuote &&
 442                text->type != word_WkCodeQuote);
 443         if (towordstyle(text->type) == word_Emph &&
 444             (attraux(text->aux) == attr_First ||
 445              attraux(text->aux) == attr_Only))
 446             rdaddc(rs, '_');           /* FIXME: configurability */
 447         else if (towordstyle(text->type) == word_Code &&
 448                  (attraux(text->aux) == attr_First ||
 449                   attraux(text->aux) == attr_Only))
 450             rdaddc(rs, '`');           /* FIXME: configurability */
 451         if (removeattr(text->type) == word_Normal) {
 452             if (text_convert(text->text, &c))
 453                 rdaddsc(rs, c);
 454             else
 455                 text_rdaddwc(rs, text->alt, NULL);
 456             sfree(c);
 457         } else if (removeattr(text->type) == word_WhiteSpace) {
 458             rdaddc(rs, ' ');
 459         } else if (removeattr(text->type) == word_Quote) {
 460             rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
 461                                        /* FIXME: configurability */
 462         }
 463         if (towordstyle(text->type) == word_Emph &&
 464             (attraux(text->aux) == attr_Last ||
 465              attraux(text->aux) == attr_Only))
 466             rdaddc(rs, '_');           /* FIXME: configurability */
 467         else if (towordstyle(text->type) == word_Code &&
 468                  (attraux(text->aux) == attr_Last ||
 469                   attraux(text->aux) == attr_Only))
 470             rdaddc(rs, '\'');          /* FIXME: configurability */
 471         break;
 472     }
 473 }
 474
 475 static int text_width(word *);
 476
 477 static int text_width_list(word *text) {
 478     int w = 0;
 479     while (text) {
 480         w += text_width(text);
 481         text = text->next;
 482     }
 483     return w;
 484 }
 485
 486 static int text_width(word *text) {
 487     switch (text->type) {
 488       case word_HyperLink:
 489       case word_HyperEnd:
 490       case word_UpperXref:
 491       case word_LowerXref:
 492       case word_XrefEnd:
 493       case word_IndexRef:
 494         return 0;
 495
 496       case word_Normal:
 497       case word_Emph:
 498       case word_Code:
 499       case word_WeakCode:
 500         return (((text->type == word_Emph ||
 501                   text->type == word_Code)
 502                  ? (attraux(text->aux) == attr_Only ? 2 :
 503                     attraux(text->aux) == attr_Always ? 0 : 1)
 504                  : 0) +
 505                 (text_convert(text->text, NULL) ?
 506                  ustrlen(text->text) :
 507                  text_width_list(text->alt)));
 508
 509       case word_WhiteSpace:
 510       case word_EmphSpace:
 511       case word_CodeSpace:
 512       case word_WkCodeSpace:
 513       case word_Quote:
 514       case word_EmphQuote:
 515       case word_CodeQuote:
 516       case word_WkCodeQuote:
 517         assert(text->type != word_CodeQuote &&
 518                text->type != word_WkCodeQuote);
 519         return (((towordstyle(text->type) == word_Emph ||
 520                   towordstyle(text->type) == word_Code)
 521                  ? (attraux(text->aux) == attr_Only ? 2 :
 522                     attraux(text->aux) == attr_Always ? 0 : 1)
 523                  : 0) + 1);
 524     }
 525     return 0;                          /* should never happen */
 526 }
 527
 528 static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
 529                          alignstruct align, int indent, int width) {
 530     rdstringc t = { 0, 0, NULL };
 531     int margin, length;
 532     int firstlinewidth, wrapwidth;
 533     wrappedline *wrapping, *p;
 534
 535     if (align.just_numbers && nprefix) {
 536         char *c;
 537         text_rdaddwc(&t, nprefix, NULL);
 538         if (text_convert(align.number_suffix, &c)) {
 539             rdaddsc(&t, c);
 540             sfree(c);
 541         }
 542     } else if (!align.just_numbers && tprefix) {
 543         char *c;
 544         text_rdaddwc(&t, tprefix, NULL);
 545         if (text_convert(align.number_suffix, &c)) {
 546             rdaddsc(&t, c);
 547             sfree(c);
 548         }
 549     }
 550     margin = length = (t.text ? strlen(t.text) : 0);
 551
 552     if (align.align == LEFTPLUS) {
 553         margin = indent - margin;
 554         if (margin < 0) margin = 0;
 555         firstlinewidth = indent + width - margin - length;
 556         wrapwidth = width;
 557     } else if (align.align == LEFT || align.align == CENTRE) {
 558         margin = 0;
 559         firstlinewidth = indent + width - length;
 560         wrapwidth = indent + width;
 561     }
 562
 563     wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
 564     for (p = wrapping; p; p = p->next) {
 565         text_rdaddwc(&t, p->begin, p->end);
 566         length = (t.text ? strlen(t.text) : 0);
 567         if (align.align == CENTRE) {
 568             margin = (indent + width - length)/2;
 569             if (margin < 0) margin = 0;
 570         }
 571         fprintf(fp, "%*s%s\n", margin, "", t.text);
 572         if (align.underline != L'\0') {
 573             char *u, uc;
 574             wchar_t uw[2];
 575             uw[0] = align.underline; uw[1] = L'\0';
 576             text_convert(uw, &u);
 577             uc = u[0];
 578             sfree(u);
 579             fprintf(fp, "%*s", margin, "");
 580             while (length--)
 581                 putc(uc, fp);
 582             putc('\n', fp);
 583         }
 584         if (align.align == LEFTPLUS)
 585             margin = indent;
 586         else
 587             margin = 0;
 588         sfree(t.text);
 589         t = empty_rdstringc;
 590     }
 591     wrap_free(wrapping);
 592     putc('\n', fp);
 593
 594     sfree(t.text);
 595 }
 596
 597 static void text_rule(FILE *fp, int indent, int width) {
 598     while (indent--) putc(' ', fp);
 599     while (width--) putc('-', fp);     /* FIXME: configurability! */
 600     putc('\n', fp);
 601     putc('\n', fp);
 602 }
 603
 604 static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
 605                       int indent, int extraindent, int width) {
 606     wrappedline *wrapping, *p;
 607     rdstringc pfx = { 0, 0, NULL };
 608     int e;
 609     int firstlinewidth = width;
 610
 611     if (prefix) {
 612         text_rdaddwc(&pfx, prefix, NULL);
 613         if (prefixextra)
 614             rdaddsc(&pfx, prefixextra);
 615         fprintf(fp, "%*s%s", indent, "", pfx.text);
 616         /* If the prefix is too long, shorten the first line to fit. */
 617         e = extraindent - strlen(pfx.text);
 618         if (e < 0) {
 619             firstlinewidth += e;       /* this decreases it, since e < 0 */
 620             if (firstlinewidth < 0) {
 621                 e = indent + extraindent;
 622                 firstlinewidth = width;
 623                 fprintf(fp, "\n");
 624             } else
 625                 e = 0;
 626         }
 627         sfree(pfx.text);
 628     } else
 629         e = indent + extraindent;
 630
 631     wrapping = wrap_para(text, firstlinewidth, width, text_width);
 632     for (p = wrapping; p; p = p->next) {
 633         rdstringc t = { 0, 0, NULL };
 634         text_rdaddwc(&t, p->begin, p->end);
 635         fprintf(fp, "%*s%s\n", e, "", t.text);
 636         e = indent + extraindent;
 637         sfree(t.text);
 638     }
 639     wrap_free(wrapping);
 640     putc('\n', fp);
 641 }
 642
 643 static void text_codepara(FILE *fp, word *text, int indent, int width) {
 644     for (; text; text = text->next) if (text->type == word_WeakCode) {
 645         char *c;
 646         text_convert(text->text, &c);
 647         if (strlen(c) > (size_t)width) {
 648             /* FIXME: warn */
 649         }
 650         fprintf(fp, "%*s%s\n", indent, "", c);
 651         sfree(c);
 652     }
 653
 654     putc('\n', fp);
 655 }
 656
 657 static void text_versionid(FILE *fp, word *text) {
 658     rdstringc t = { 0, 0, NULL };
 659
 660     rdaddc(&t, '[');                   /* FIXME: configurability */
 661     text_rdaddwc(&t, text, NULL);
 662     rdaddc(&t, ']');                   /* FIXME: configurability */
 663
 664     fprintf(fp, "%s\n", t.text);
 665     sfree(t.text);
 666 }