mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_text.c

   1 /*
   2  * text backend for Halibut
   3  */
   4
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <assert.h>
   8 #include "halibut.h"
   9
  10 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
  11 typedef struct {
  12     alignment align;
  13     int just_numbers;
  14     wchar_t underline;
  15     wchar_t *number_suffix;
  16 } alignstruct;
  17
  18 typedef struct {
  19     int indent, indent_code;
  20     int listindentbefore, listindentafter;
  21     int width;
  22     alignstruct atitle, achapter, *asect;
  23     int nasect;
  24     int include_version_id;
  25     int indent_preambles;
  26     word bullet;
  27     char *filename;
  28 } textconfig;
  29
  30 static int text_convert(wchar_t *, char **);
  31
  32 static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
  33 static void text_rule(FILE *, int, int);
  34 static void text_para(FILE *, word *, char *, word *, int, int, int);
  35 static void text_codepara(FILE *, word *, int, int);
  36 static void text_versionid(FILE *, word *);
  37
  38 static alignment utoalign(wchar_t *p) {
  39     if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
  40         return CENTRE;
  41     if (!ustricmp(p, L"leftplus"))
  42         return LEFTPLUS;
  43     return LEFT;
  44 }
  45
  46 static textconfig text_configure(paragraph *source) {
  47     textconfig ret;
  48
  49     /*
  50      * Non-negotiables.
  51      */
  52     ret.bullet.next = NULL;
  53     ret.bullet.alt = NULL;
  54     ret.bullet.type = word_Normal;
  55     ret.atitle.just_numbers = FALSE;   /* ignored */
  56
  57     /*
  58      * Defaults.
  59      */
  60     ret.indent = 7;
  61     ret.indent_code = 2;
  62     ret.listindentbefore = 1;
  63     ret.listindentafter = 3;
  64     ret.width = 68;
  65     ret.atitle.align = CENTRE;
  66     ret.atitle.underline = L'=';
  67     ret.achapter.align = LEFT;
  68     ret.achapter.just_numbers = FALSE;
  69     ret.achapter.number_suffix = L": ";
  70     ret.achapter.underline = L'-';
  71     ret.nasect = 1;
  72     ret.asect = mknewa(alignstruct, ret.nasect);
  73     ret.asect[0].align = LEFTPLUS;
  74     ret.asect[0].just_numbers = TRUE;
  75     ret.asect[0].number_suffix = L" ";
  76     ret.asect[0].underline = L'\0';
  77     ret.include_version_id = TRUE;
  78     ret.indent_preambles = FALSE;
  79     ret.bullet.text = L"-";
  80     ret.filename = dupstr("output.txt");
  81
  82     for (; source; source = source->next) {
  83         if (source->type == para_Config) {
  84             if (!ustricmp(source->keyword, L"text-indent")) {
  85                 ret.indent = utoi(uadv(source->keyword));
  86             } else if (!ustricmp(source->keyword, L"text-filename")) {
  87                 sfree(ret.filename);
  88                 ret.filename = utoa_dup(uadv(source->keyword));
  89             } else if (!ustricmp(source->keyword, L"text-indent-code")) {
  90                 ret.indent_code = utoi(uadv(source->keyword));
  91             } else if (!ustricmp(source->keyword, L"text-width")) {
  92                 ret.width = utoi(uadv(source->keyword));
  93             } else if (!ustricmp(source->keyword, L"text-list-indent")) {
  94                 ret.listindentbefore = utoi(uadv(source->keyword));
  95             } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
  96                 ret.listindentafter = utoi(uadv(source->keyword));
  97             } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
  98                 ret.achapter.align = utoalign(uadv(source->keyword));
  99             } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
 100                 ret.achapter.underline = *uadv(source->keyword);
 101             } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
 102                 ret.achapter.just_numbers = utob(uadv(source->keyword));
 103             } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
 104                 ret.achapter.number_suffix = uadv(source->keyword);
 105             } else if (!ustricmp(source->keyword, L"text-section-align")) {
 106                 wchar_t *p = uadv(source->keyword);
 107                 int n = 0;
 108                 if (uisdigit(*p)) {
 109                     n = utoi(p);
 110                     p = uadv(p);
 111                 }
 112                 if (n >= ret.nasect) {
 113                     int i;
 114                     ret.asect = resize(ret.asect, n+1);
 115                     for (i = ret.nasect; i <= n; i++)
 116                         ret.asect[i] = ret.asect[ret.nasect-1];
 117                     ret.nasect = n+1;
 118                 }
 119                 ret.asect[n].align = utoalign(p);
 120             } else if (!ustricmp(source->keyword, L"text-section-underline")) {
 121                 wchar_t *p = uadv(source->keyword);
 122                 int n = 0;
 123                 if (uisdigit(*p)) {
 124                     n = utoi(p);
 125                     p = uadv(p);
 126                 }
 127                 if (n >= ret.nasect) {
 128                     int i;
 129                     ret.asect = resize(ret.asect, n+1);
 130                     for (i = ret.nasect; i <= n; i++)
 131                         ret.asect[i] = ret.asect[ret.nasect-1];
 132                     ret.nasect = n+1;
 133                 }
 134                 ret.asect[n].underline = *p;
 135             } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
 136                 wchar_t *p = uadv(source->keyword);
 137                 int n = 0;
 138                 if (uisdigit(*p)) {
 139                     n = utoi(p);
 140                     p = uadv(p);
 141                 }
 142                 if (n >= ret.nasect) {
 143                     int i;
 144                     ret.asect = resize(ret.asect, n+1);
 145                     for (i = ret.nasect; i <= n; i++)
 146                         ret.asect[i] = ret.asect[ret.nasect-1];
 147                     ret.nasect = n+1;
 148                 }
 149                 ret.asect[n].just_numbers = utob(p);
 150             } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
 151                 wchar_t *p = uadv(source->keyword);
 152                 int n = 0;
 153                 if (uisdigit(*p)) {
 154                     n = utoi(p);
 155                     p = uadv(p);
 156                 }
 157                 if (n >= ret.nasect) {
 158                     int i;
 159                     ret.asect = resize(ret.asect, n+1);
 160                     for (i = ret.nasect; i <= n; i++) {
 161                         ret.asect[i] = ret.asect[ret.nasect-1];
 162                     }
 163                     ret.nasect = n+1;
 164                 }
 165                 ret.asect[n].number_suffix = p;
 166             } else if (!ustricmp(source->keyword, L"text-title-align")) {
 167                 ret.atitle.align = utoalign(uadv(source->keyword));
 168             } else if (!ustricmp(source->keyword, L"text-title-underline")) {
 169                 ret.atitle.underline = *uadv(source->keyword);
 170             } else if (!ustricmp(source->keyword, L"text-versionid")) {
 171                 ret.include_version_id = utob(uadv(source->keyword));
 172             } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
 173                 ret.indent_preambles = utob(uadv(source->keyword));
 174             } else if (!ustricmp(source->keyword, L"text-bullet")) {
 175                 ret.bullet.text = uadv(source->keyword);
 176             }
 177         }
 178     }
 179
 180     return ret;
 181 }
 182
 183 void text_backend(paragraph *sourceform, keywordlist *keywords,
 184                   indexdata *idx) {
 185     paragraph *p;
 186     textconfig conf;
 187     word *prefix, *body, *wp;
 188     word spaceword;
 189     FILE *fp;
 190     char *prefixextra;
 191     int nesting, nestindent;
 192     int indentb, indenta;
 193
 194     IGNORE(keywords);                  /* we don't happen to need this */
 195     IGNORE(idx);                       /* or this */
 196
 197     conf = text_configure(sourceform);
 198
 199     /*
 200      * Open the output file.
 201      */
 202     fp = fopen(conf.filename, "w");
 203     if (!fp) {
 204         error(err_cantopenw, conf.filename);
 205         return;
 206     }
 207
 208     /* Do the title */
 209     for (p = sourceform; p; p = p->next)
 210         if (p->type == para_Title)
 211             text_heading(fp, NULL, NULL, p->words,
 212                          conf.atitle, conf.indent, conf.width);
 213
 214     nestindent = conf.listindentbefore + conf.listindentafter;
 215     nesting = (conf.indent_preambles ? 0 : -conf.indent);
 216
 217     /* Do the main document */
 218     for (p = sourceform; p; p = p->next) switch (p->type) {
 219
 220       case para_QuotePush:
 221         nesting += 2;
 222         break;
 223       case para_QuotePop:
 224         nesting -= 2;
 225         assert(nesting >= 0);
 226         break;
 227
 228       case para_LcontPush:
 229         nesting += nestindent;
 230         break;
 231       case para_LcontPop:
 232         nesting -= nestindent;
 233         assert(nesting >= 0);
 234         break;
 235
 236         /*
 237          * Things we ignore because we've already processed them or
 238          * aren't going to touch them in this pass.
 239          */
 240       case para_IM:
 241       case para_BR:
 242       case para_Biblio:                /* only touch BiblioCited */
 243       case para_VersionID:
 244       case para_NoCite:
 245       case para_Title:
 246         break;
 247
 248         /*
 249          * Chapter titles.
 250          */
 251       case para_Chapter:
 252       case para_Appendix:
 253       case para_UnnumberedChapter:
 254         text_heading(fp, p->kwtext, p->kwtext2, p->words,
 255                      conf.achapter, conf.indent, conf.width);
 256         nesting = 0;
 257         break;
 258
 259       case para_Heading:
 260       case para_Subsect:
 261         text_heading(fp, p->kwtext, p->kwtext2, p->words,
 262                      conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
 263                      conf.indent, conf.width);
 264         break;
 265
 266       case para_Rule:
 267         text_rule(fp, conf.indent + nesting, conf.width - nesting);
 268         break;
 269
 270       case para_Normal:
 271       case para_Copyright:
 272       case para_DescribedThing:
 273       case para_Description:
 274       case para_BiblioCited:
 275       case para_Bullet:
 276       case para_NumberedList:
 277         if (p->type == para_Bullet) {
 278             prefix = &conf.bullet;
 279             prefixextra = NULL;
 280             indentb = conf.listindentbefore;
 281             indenta = conf.listindentafter;
 282         } else if (p->type == para_NumberedList) {
 283             prefix = p->kwtext;
 284             prefixextra = ".";         /* FIXME: configurability */
 285             indentb = conf.listindentbefore;
 286             indenta = conf.listindentafter;
 287         } else if (p->type == para_Description) {
 288             prefix = NULL;
 289             prefixextra = NULL;
 290             indentb = conf.listindentbefore;
 291             indenta = conf.listindentafter;
 292         } else {
 293             prefix = NULL;
 294             prefixextra = NULL;
 295             indentb = indenta = 0;
 296         }
 297         if (p->type == para_BiblioCited) {
 298             body = dup_word_list(p->kwtext);
 299             for (wp = body; wp->next; wp = wp->next);
 300             wp->next = &spaceword;
 301             spaceword.next = p->words;
 302             spaceword.alt = NULL;
 303             spaceword.type = word_WhiteSpace;
 304             spaceword.text = NULL;
 305         } else {
 306             wp = NULL;
 307             body = p->words;
 308         }
 309         text_para(fp, prefix, prefixextra, body,
 310                   conf.indent + nesting + indentb, indenta,
 311                   conf.width - nesting - indentb - indenta);
 312         if (wp) {
 313             wp->next = NULL;
 314             free_word_list(body);
 315         }
 316         break;
 317
 318       case para_Code:
 319         text_codepara(fp, p->words,
 320                       conf.indent + nesting + conf.indent_code,
 321                       conf.width - nesting - 2 * conf.indent_code);
 322         break;
 323     }
 324
 325     /* Do the version ID */
 326     if (conf.include_version_id) {
 327         for (p = sourceform; p; p = p->next)
 328             if (p->type == para_VersionID)
 329                 text_versionid(fp, p->words);
 330     }
 331
 332     /*
 333      * Tidy up
 334      */
 335     fclose(fp);
 336     sfree(conf.asect);
 337     sfree(conf.filename);
 338 }
 339
 340 /*
 341  * Convert a wide string into a string of chars. If `result' is
 342  * non-NULL, mallocs the resulting string and stores a pointer to
 343  * it in `*result'. If `result' is NULL, merely checks whether all
 344  * characters in the string are feasible for the output character
 345  * set.
 346  *
 347  * Return is nonzero if all characters are OK. If not all
 348  * characters are OK but `result' is non-NULL, a result _will_
 349  * still be generated!
 350  */
 351 static int text_convert(wchar_t *s, char **result) {
 352     /*
 353      * FIXME. Currently this is ISO8859-1 only.
 354      */
 355     int doing = (result != 0);
 356     int ok = TRUE;
 357     char *p = NULL;
 358     int plen = 0, psize = 0;
 359
 360     for (; *s; s++) {
 361         wchar_t c = *s;
 362         char outc;
 363
 364         if ((c >= 32 && c <= 126) ||
 365             (c >= 160 && c <= 255)) {
 366             /* Char is OK. */
 367             outc = (char)c;
 368         } else {
 369             /* Char is not OK. */
 370             ok = FALSE;
 371             outc = 0xBF;               /* approximate the good old DEC `uh?' */
 372         }
 373         if (doing) {
 374             if (plen >= psize) {
 375                 psize = plen + 256;
 376                 p = resize(p, psize);
 377             }
 378             p[plen++] = outc;
 379         }
 380     }
 381     if (doing) {
 382         p = resize(p, plen+1);
 383         p[plen] = '\0';
 384         *result = p;
 385     }
 386     return ok;
 387 }
 388
 389 static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
 390     char *c;
 391
 392     for (; text && text != end; text = text->next) switch (text->type) {
 393       case word_HyperLink:
 394       case word_HyperEnd:
 395       case word_UpperXref:
 396       case word_LowerXref:
 397       case word_XrefEnd:
 398       case word_IndexRef:
 399         break;
 400
 401       case word_Normal:
 402       case word_Emph:
 403       case word_Code:
 404       case word_WeakCode:
 405       case word_WhiteSpace:
 406       case word_EmphSpace:
 407       case word_CodeSpace:
 408       case word_WkCodeSpace:
 409       case word_Quote:
 410       case word_EmphQuote:
 411       case word_CodeQuote:
 412       case word_WkCodeQuote:
 413         assert(text->type != word_CodeQuote &&
 414                text->type != word_WkCodeQuote);
 415         if (towordstyle(text->type) == word_Emph &&
 416             (attraux(text->aux) == attr_First ||
 417              attraux(text->aux) == attr_Only))
 418             rdaddc(rs, '_');           /* FIXME: configurability */
 419         else if (towordstyle(text->type) == word_Code &&
 420                  (attraux(text->aux) == attr_First ||
 421                   attraux(text->aux) == attr_Only))
 422             rdaddc(rs, '`');           /* FIXME: configurability */
 423         if (removeattr(text->type) == word_Normal) {
 424             if (text_convert(text->text, &c))
 425                 rdaddsc(rs, c);
 426             else
 427                 text_rdaddwc(rs, text->alt, NULL);
 428             sfree(c);
 429         } else if (removeattr(text->type) == word_WhiteSpace) {
 430             rdaddc(rs, ' ');
 431         } else if (removeattr(text->type) == word_Quote) {
 432             rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
 433                                        /* FIXME: configurability */
 434         }
 435         if (towordstyle(text->type) == word_Emph &&
 436             (attraux(text->aux) == attr_Last ||
 437              attraux(text->aux) == attr_Only))
 438             rdaddc(rs, '_');           /* FIXME: configurability */
 439         else if (towordstyle(text->type) == word_Code &&
 440                  (attraux(text->aux) == attr_Last ||
 441                   attraux(text->aux) == attr_Only))
 442             rdaddc(rs, '\'');          /* FIXME: configurability */
 443         break;
 444     }
 445 }
 446
 447 static int text_width(word *);
 448
 449 static int text_width_list(word *text) {
 450     int w = 0;
 451     while (text) {
 452         w += text_width(text);
 453         text = text->next;
 454     }
 455     return w;
 456 }
 457
 458 static int text_width(word *text) {
 459     switch (text->type) {
 460       case word_HyperLink:
 461       case word_HyperEnd:
 462       case word_UpperXref:
 463       case word_LowerXref:
 464       case word_XrefEnd:
 465       case word_IndexRef:
 466         return 0;
 467
 468       case word_Normal:
 469       case word_Emph:
 470       case word_Code:
 471       case word_WeakCode:
 472         return (((text->type == word_Emph ||
 473                   text->type == word_Code)
 474                  ? (attraux(text->aux) == attr_Only ? 2 :
 475                     attraux(text->aux) == attr_Always ? 0 : 1)
 476                  : 0) +
 477                 (text_convert(text->text, NULL) ?
 478                  ustrlen(text->text) :
 479                  text_width_list(text->alt)));
 480
 481       case word_WhiteSpace:
 482       case word_EmphSpace:
 483       case word_CodeSpace:
 484       case word_WkCodeSpace:
 485       case word_Quote:
 486       case word_EmphQuote:
 487       case word_CodeQuote:
 488       case word_WkCodeQuote:
 489         assert(text->type != word_CodeQuote &&
 490                text->type != word_WkCodeQuote);
 491         return (((towordstyle(text->type) == word_Emph ||
 492                   towordstyle(text->type) == word_Code)
 493                  ? (attraux(text->aux) == attr_Only ? 2 :
 494                     attraux(text->aux) == attr_Always ? 0 : 1)
 495                  : 0) + 1);
 496     }
 497     return 0;                          /* should never happen */
 498 }
 499
 500 static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
 501                          alignstruct align, int indent, int width) {
 502     rdstringc t = { 0, 0, NULL };
 503     int margin, length;
 504     int firstlinewidth, wrapwidth;
 505     wrappedline *wrapping, *p;
 506
 507     if (align.just_numbers && nprefix) {
 508         char *c;
 509         text_rdaddwc(&t, nprefix, NULL);
 510         if (text_convert(align.number_suffix, &c)) {
 511             rdaddsc(&t, c);
 512             sfree(c);
 513         }
 514     } else if (!align.just_numbers && tprefix) {
 515         char *c;
 516         text_rdaddwc(&t, tprefix, NULL);
 517         if (text_convert(align.number_suffix, &c)) {
 518             rdaddsc(&t, c);
 519             sfree(c);
 520         }
 521     }
 522     margin = length = (t.text ? strlen(t.text) : 0);
 523
 524     if (align.align == LEFTPLUS) {
 525         margin = indent - margin;
 526         if (margin < 0) margin = 0;
 527         firstlinewidth = indent + width - margin - length;
 528         wrapwidth = width;
 529     } else if (align.align == LEFT || align.align == CENTRE) {
 530         margin = 0;
 531         firstlinewidth = indent + width - length;
 532         wrapwidth = indent + width;
 533     }
 534
 535     wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
 536     for (p = wrapping; p; p = p->next) {
 537         text_rdaddwc(&t, p->begin, p->end);
 538         length = (t.text ? strlen(t.text) : 0);
 539         if (align.align == CENTRE) {
 540             margin = (indent + width - length)/2;
 541             if (margin < 0) margin = 0;
 542         }
 543         fprintf(fp, "%*s%s\n", margin, "", t.text);
 544         if (align.underline != L'\0') {
 545             char *u, uc;
 546             wchar_t uw[2];
 547             uw[0] = align.underline; uw[1] = L'\0';
 548             text_convert(uw, &u);
 549             uc = u[0];
 550             sfree(u);
 551             fprintf(fp, "%*s", margin, "");
 552             while (length--)
 553                 putc(uc, fp);
 554             putc('\n', fp);
 555         }
 556         if (align.align == LEFTPLUS)
 557             margin = indent;
 558         else
 559             margin = 0;
 560         sfree(t.text);
 561         t = empty_rdstringc;
 562     }
 563     wrap_free(wrapping);
 564     putc('\n', fp);
 565
 566     sfree(t.text);
 567 }
 568
 569 static void text_rule(FILE *fp, int indent, int width) {
 570     while (indent--) putc(' ', fp);
 571     while (width--) putc('-', fp);     /* FIXME: configurability! */
 572     putc('\n', fp);
 573     putc('\n', fp);
 574 }
 575
 576 static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
 577                       int indent, int extraindent, int width) {
 578     wrappedline *wrapping, *p;
 579     rdstringc pfx = { 0, 0, NULL };
 580     int e;
 581     int firstlinewidth = width;
 582
 583     if (prefix) {
 584         text_rdaddwc(&pfx, prefix, NULL);
 585         if (prefixextra)
 586             rdaddsc(&pfx, prefixextra);
 587         fprintf(fp, "%*s%s", indent, "", pfx.text);
 588         /* If the prefix is too long, shorten the first line to fit. */
 589         e = extraindent - strlen(pfx.text);
 590         if (e < 0) {
 591             firstlinewidth += e;       /* this decreases it, since e < 0 */
 592             if (firstlinewidth < 0) {
 593                 e = indent + extraindent;
 594                 firstlinewidth = width;
 595                 fprintf(fp, "\n");
 596             } else
 597                 e = 0;
 598         }
 599         sfree(pfx.text);
 600     } else
 601         e = indent + extraindent;
 602
 603     wrapping = wrap_para(text, firstlinewidth, width, text_width);
 604     for (p = wrapping; p; p = p->next) {
 605         rdstringc t = { 0, 0, NULL };
 606         text_rdaddwc(&t, p->begin, p->end);
 607         fprintf(fp, "%*s%s\n", e, "", t.text);
 608         e = indent + extraindent;
 609         sfree(t.text);
 610     }
 611     wrap_free(wrapping);
 612     putc('\n', fp);
 613 }
 614
 615 static void text_codepara(FILE *fp, word *text, int indent, int width) {
 616     for (; text; text = text->next) if (text->type == word_WeakCode) {
 617         char *c;
 618         text_convert(text->text, &c);
 619         if (strlen(c) > (size_t)width) {
 620             /* FIXME: warn */
 621         }
 622         fprintf(fp, "%*s%s\n", indent, "", c);
 623         sfree(c);
 624     }
 625
 626     putc('\n', fp);
 627 }
 628
 629 static void text_versionid(FILE *fp, word *text) {
 630     rdstringc t = { 0, 0, NULL };
 631
 632     rdaddc(&t, '[');                   /* FIXME: configurability */
 633     text_rdaddwc(&t, text, NULL);
 634     rdaddc(&t, ']');                   /* FIXME: configurability */
 635
 636     fprintf(fp, "%s\n", t.text);
 637     sfree(t.text);
 638 }