mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_text.c

   1 /*
   2  * text backend for Halibut
   3  */
   4
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <assert.h>
   8 #include "halibut.h"
   9
  10 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
  11 typedef struct {
  12     alignment align;
  13     int just_numbers;
  14     wchar_t underline;
  15     wchar_t *number_suffix;
  16 } alignstruct;
  17
  18 typedef struct {
  19     int indent, indent_code;
  20     int listindentbefore, listindentafter;
  21     int width;
  22     alignstruct atitle, achapter, *asect;
  23     int nasect;
  24     int include_version_id;
  25     int indent_preambles;
  26     int charset;
  27     word bullet;
  28     char *filename;
  29 } textconfig;
  30
  31 typedef struct {
  32     FILE *fp;
  33     int charset;
  34     charset_state state;
  35 } textfile;
  36
  37 static void text_heading(textfile *, word *, word *, word *, alignstruct,
  38                          int,int);
  39 static void text_rule(textfile *, int, int);
  40 static void text_para(textfile *, word *, wchar_t *, word *, int, int, int);
  41 static void text_codepara(textfile *, word *, int, int);
  42 static void text_versionid(textfile *, word *);
  43
  44 static void text_output(textfile *, const wchar_t *);
  45 static void text_output_many(textfile *, int, wchar_t);
  46
  47 static alignment utoalign(wchar_t *p) {
  48     if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
  49         return CENTRE;
  50     if (!ustricmp(p, L"leftplus"))
  51         return LEFTPLUS;
  52     return LEFT;
  53 }
  54
  55 static textconfig text_configure(paragraph *source) {
  56     textconfig ret;
  57
  58     /*
  59      * Non-negotiables.
  60      */
  61     ret.bullet.next = NULL;
  62     ret.bullet.alt = NULL;
  63     ret.bullet.type = word_Normal;
  64     ret.atitle.just_numbers = FALSE;   /* ignored */
  65
  66     /*
  67      * Defaults.
  68      */
  69     ret.indent = 7;
  70     ret.indent_code = 2;
  71     ret.listindentbefore = 1;
  72     ret.listindentafter = 3;
  73     ret.width = 68;
  74     ret.atitle.align = CENTRE;
  75     ret.atitle.underline = L'=';
  76     ret.achapter.align = LEFT;
  77     ret.achapter.just_numbers = FALSE;
  78     ret.achapter.number_suffix = L": ";
  79     ret.achapter.underline = L'-';
  80     ret.nasect = 1;
  81     ret.asect = mknewa(alignstruct, ret.nasect);
  82     ret.asect[0].align = LEFTPLUS;
  83     ret.asect[0].just_numbers = TRUE;
  84     ret.asect[0].number_suffix = L" ";
  85     ret.asect[0].underline = L'\0';
  86     ret.include_version_id = TRUE;
  87     ret.indent_preambles = FALSE;
  88     ret.bullet.text = L"-";
  89     ret.filename = dupstr("output.txt");
  90     ret.charset = CS_ASCII;
  91
  92     for (; source; source = source->next) {
  93         if (source->type == para_Config) {
  94             if (!ustricmp(source->keyword, L"text-indent")) {
  95                 ret.indent = utoi(uadv(source->keyword));
  96             } else if (!ustricmp(source->keyword, L"text-charset")) {
  97                 char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
  98                 ret.charset = charset_from_localenc(csname);
  99                 sfree(csname);
 100             } else if (!ustricmp(source->keyword, L"text-filename")) {
 101                 sfree(ret.filename);
 102                 ret.filename = dupstr(adv(source->origkeyword));
 103             } else if (!ustricmp(source->keyword, L"text-indent-code")) {
 104                 ret.indent_code = utoi(uadv(source->keyword));
 105             } else if (!ustricmp(source->keyword, L"text-width")) {
 106                 ret.width = utoi(uadv(source->keyword));
 107             } else if (!ustricmp(source->keyword, L"text-list-indent")) {
 108                 ret.listindentbefore = utoi(uadv(source->keyword));
 109             } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
 110                 ret.listindentafter = utoi(uadv(source->keyword));
 111             } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
 112                 ret.achapter.align = utoalign(uadv(source->keyword));
 113             } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
 114                 ret.achapter.underline = *uadv(source->keyword);
 115             } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
 116                 ret.achapter.just_numbers = utob(uadv(source->keyword));
 117             } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
 118                 ret.achapter.number_suffix = uadv(source->keyword);
 119             } else if (!ustricmp(source->keyword, L"text-section-align")) {
 120                 wchar_t *p = uadv(source->keyword);
 121                 int n = 0;
 122                 if (uisdigit(*p)) {
 123                     n = utoi(p);
 124                     p = uadv(p);
 125                 }
 126                 if (n >= ret.nasect) {
 127                     int i;
 128                     ret.asect = resize(ret.asect, n+1);
 129                     for (i = ret.nasect; i <= n; i++)
 130                         ret.asect[i] = ret.asect[ret.nasect-1];
 131                     ret.nasect = n+1;
 132                 }
 133                 ret.asect[n].align = utoalign(p);
 134             } else if (!ustricmp(source->keyword, L"text-section-underline")) {
 135                 wchar_t *p = uadv(source->keyword);
 136                 int n = 0;
 137                 if (uisdigit(*p)) {
 138                     n = utoi(p);
 139                     p = uadv(p);
 140                 }
 141                 if (n >= ret.nasect) {
 142                     int i;
 143                     ret.asect = resize(ret.asect, n+1);
 144                     for (i = ret.nasect; i <= n; i++)
 145                         ret.asect[i] = ret.asect[ret.nasect-1];
 146                     ret.nasect = n+1;
 147                 }
 148                 ret.asect[n].underline = *p;
 149             } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
 150                 wchar_t *p = uadv(source->keyword);
 151                 int n = 0;
 152                 if (uisdigit(*p)) {
 153                     n = utoi(p);
 154                     p = uadv(p);
 155                 }
 156                 if (n >= ret.nasect) {
 157                     int i;
 158                     ret.asect = resize(ret.asect, n+1);
 159                     for (i = ret.nasect; i <= n; i++)
 160                         ret.asect[i] = ret.asect[ret.nasect-1];
 161                     ret.nasect = n+1;
 162                 }
 163                 ret.asect[n].just_numbers = utob(p);
 164             } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
 165                 wchar_t *p = uadv(source->keyword);
 166                 int n = 0;
 167                 if (uisdigit(*p)) {
 168                     n = utoi(p);
 169                     p = uadv(p);
 170                 }
 171                 if (n >= ret.nasect) {
 172                     int i;
 173                     ret.asect = resize(ret.asect, n+1);
 174                     for (i = ret.nasect; i <= n; i++) {
 175                         ret.asect[i] = ret.asect[ret.nasect-1];
 176                     }
 177                     ret.nasect = n+1;
 178                 }
 179                 ret.asect[n].number_suffix = p;
 180             } else if (!ustricmp(source->keyword, L"text-title-align")) {
 181                 ret.atitle.align = utoalign(uadv(source->keyword));
 182             } else if (!ustricmp(source->keyword, L"text-title-underline")) {
 183                 ret.atitle.underline = *uadv(source->keyword);
 184             } else if (!ustricmp(source->keyword, L"text-versionid")) {
 185                 ret.include_version_id = utob(uadv(source->keyword));
 186             } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
 187                 ret.indent_preambles = utob(uadv(source->keyword));
 188             } else if (!ustricmp(source->keyword, L"text-bullet")) {
 189                 ret.bullet.text = uadv(source->keyword);
 190             }
 191         }
 192     }
 193
 194     return ret;
 195 }
 196
 197 paragraph *text_config_filename(char *filename)
 198 {
 199     return cmdline_cfg_simple("text-filename", filename, NULL);
 200 }
 201
 202 void text_backend(paragraph *sourceform, keywordlist *keywords,
 203                   indexdata *idx, void *unused) {
 204     paragraph *p;
 205     textconfig conf;
 206     word *prefix, *body, *wp;
 207     word spaceword;
 208     textfile tf;
 209     wchar_t *prefixextra;
 210     int nesting, nestindent;
 211     int indentb, indenta;
 212
 213     IGNORE(unused);
 214     IGNORE(keywords);                  /* we don't happen to need this */
 215     IGNORE(idx);                       /* or this */
 216
 217     conf = text_configure(sourceform);
 218
 219     /*
 220      * Open the output file.
 221      */
 222     tf.fp = fopen(conf.filename, "w");
 223     if (!tf.fp) {
 224         error(err_cantopenw, conf.filename);
 225         return;
 226     }
 227     tf.charset = conf.charset;
 228     tf.state = charset_init_state;
 229
 230     /* Do the title */
 231     for (p = sourceform; p; p = p->next)
 232         if (p->type == para_Title)
 233             text_heading(&tf, NULL, NULL, p->words,
 234                          conf.atitle, conf.indent, conf.width);
 235
 236     nestindent = conf.listindentbefore + conf.listindentafter;
 237     nesting = (conf.indent_preambles ? 0 : -conf.indent);
 238
 239     /* Do the main document */
 240     for (p = sourceform; p; p = p->next) switch (p->type) {
 241
 242       case para_QuotePush:
 243         nesting += 2;
 244         break;
 245       case para_QuotePop:
 246         nesting -= 2;
 247         assert(nesting >= 0);
 248         break;
 249
 250       case para_LcontPush:
 251         nesting += nestindent;
 252         break;
 253       case para_LcontPop:
 254         nesting -= nestindent;
 255         assert(nesting >= 0);
 256         break;
 257
 258         /*
 259          * Things we ignore because we've already processed them or
 260          * aren't going to touch them in this pass.
 261          */
 262       case para_IM:
 263       case para_BR:
 264       case para_Biblio:                /* only touch BiblioCited */
 265       case para_VersionID:
 266       case para_NoCite:
 267       case para_Title:
 268         break;
 269
 270         /*
 271          * Chapter titles.
 272          */
 273       case para_Chapter:
 274       case para_Appendix:
 275       case para_UnnumberedChapter:
 276         text_heading(&tf, p->kwtext, p->kwtext2, p->words,
 277                      conf.achapter, conf.indent, conf.width);
 278         nesting = 0;
 279         break;
 280
 281       case para_Heading:
 282       case para_Subsect:
 283         text_heading(&tf, p->kwtext, p->kwtext2, p->words,
 284                      conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
 285                      conf.indent, conf.width);
 286         break;
 287
 288       case para_Rule:
 289         text_rule(&tf, conf.indent + nesting, conf.width - nesting);
 290         break;
 291
 292       case para_Normal:
 293       case para_Copyright:
 294       case para_DescribedThing:
 295       case para_Description:
 296       case para_BiblioCited:
 297       case para_Bullet:
 298       case para_NumberedList:
 299         if (p->type == para_Bullet) {
 300             prefix = &conf.bullet;
 301             prefixextra = NULL;
 302             indentb = conf.listindentbefore;
 303             indenta = conf.listindentafter;
 304         } else if (p->type == para_NumberedList) {
 305             prefix = p->kwtext;
 306             prefixextra = L".";        /* FIXME: configurability */
 307             indentb = conf.listindentbefore;
 308             indenta = conf.listindentafter;
 309         } else if (p->type == para_Description) {
 310             prefix = NULL;
 311             prefixextra = NULL;
 312             indentb = conf.listindentbefore;
 313             indenta = conf.listindentafter;
 314         } else {
 315             prefix = NULL;
 316             prefixextra = NULL;
 317             indentb = indenta = 0;
 318         }
 319         if (p->type == para_BiblioCited) {
 320             body = dup_word_list(p->kwtext);
 321             for (wp = body; wp->next; wp = wp->next);
 322             wp->next = &spaceword;
 323             spaceword.next = p->words;
 324             spaceword.alt = NULL;
 325             spaceword.type = word_WhiteSpace;
 326             spaceword.text = NULL;
 327         } else {
 328             wp = NULL;
 329             body = p->words;
 330         }
 331         text_para(&tf, prefix, prefixextra, body,
 332                   conf.indent + nesting + indentb, indenta,
 333                   conf.width - nesting - indentb - indenta);
 334         if (wp) {
 335             wp->next = NULL;
 336             free_word_list(body);
 337         }
 338         break;
 339
 340       case para_Code:
 341         text_codepara(&tf, p->words,
 342                       conf.indent + nesting + conf.indent_code,
 343                       conf.width - nesting - 2 * conf.indent_code);
 344         break;
 345     }
 346
 347     /* Do the version ID */
 348     if (conf.include_version_id) {
 349         for (p = sourceform; p; p = p->next)
 350             if (p->type == para_VersionID)
 351                 text_versionid(&tf, p->words);
 352     }
 353
 354     /*
 355      * Tidy up
 356      */
 357     text_output(&tf, NULL);            /* end charset conversion */
 358     fclose(tf.fp);
 359     sfree(conf.asect);
 360     sfree(conf.filename);
 361 }
 362
 363 static void text_output(textfile *tf, const wchar_t *s)
 364 {
 365     char buf[256];
 366     int ret, len;
 367     const wchar_t **sp;
 368
 369     if (!s) {
 370         sp = NULL;
 371         len = 1;
 372     } else {
 373         sp = &s;
 374         len = ustrlen(s);
 375     }
 376
 377     while (len > 0) {
 378         ret = charset_from_unicode(sp, &len, buf, lenof(buf),
 379                                    tf->charset, &tf->state, NULL);
 380         if (!sp)
 381             len = 0;
 382         fwrite(buf, 1, ret, tf->fp);
 383     }
 384 }
 385
 386 static void text_output_many(textfile *tf, int n, wchar_t c)
 387 {
 388     wchar_t s[2];
 389     s[0] = c;
 390     s[1] = L'\0';
 391     while (n--)
 392         text_output(tf, s);
 393 }
 394
 395 static void text_rdaddw(int charset, rdstring *rs, word *text, word *end) {
 396     for (; text && text != end; text = text->next) switch (text->type) {
 397       case word_HyperLink:
 398       case word_HyperEnd:
 399       case word_UpperXref:
 400       case word_LowerXref:
 401       case word_XrefEnd:
 402       case word_IndexRef:
 403         break;
 404
 405       case word_Normal:
 406       case word_Emph:
 407       case word_Code:
 408       case word_WeakCode:
 409       case word_WhiteSpace:
 410       case word_EmphSpace:
 411       case word_CodeSpace:
 412       case word_WkCodeSpace:
 413       case word_Quote:
 414       case word_EmphQuote:
 415       case word_CodeQuote:
 416       case word_WkCodeQuote:
 417         assert(text->type != word_CodeQuote &&
 418                text->type != word_WkCodeQuote);
 419         if (towordstyle(text->type) == word_Emph &&
 420             (attraux(text->aux) == attr_First ||
 421              attraux(text->aux) == attr_Only))
 422             rdadd(rs, L'_');           /* FIXME: configurability */
 423         else if (towordstyle(text->type) == word_Code &&
 424                  (attraux(text->aux) == attr_First ||
 425                   attraux(text->aux) == attr_Only))
 426             rdadd(rs, L'`');           /* FIXME: configurability */
 427         if (removeattr(text->type) == word_Normal) {
 428             if (cvt_ok(charset, text->text) || !text->alt)
 429                 rdadds(rs, text->text);
 430             else
 431                 text_rdaddw(charset, rs, text->alt, NULL);
 432         } else if (removeattr(text->type) == word_WhiteSpace) {
 433             rdadd(rs, L' ');
 434         } else if (removeattr(text->type) == word_Quote) {
 435             rdadd(rs, quoteaux(text->aux) == quote_Open ? L'`' : L'\'');
 436                                        /* FIXME: configurability */
 437         }
 438         if (towordstyle(text->type) == word_Emph &&
 439             (attraux(text->aux) == attr_Last ||
 440              attraux(text->aux) == attr_Only))
 441             rdadd(rs, L'_');           /* FIXME: configurability */
 442         else if (towordstyle(text->type) == word_Code &&
 443                  (attraux(text->aux) == attr_Last ||
 444                   attraux(text->aux) == attr_Only))
 445             rdadd(rs, L'\'');          /* FIXME: configurability */
 446         break;
 447     }
 448 }
 449
 450 static int text_width(void *, word *);
 451
 452 static int text_width_list(void *ctx, word *text) {
 453     int w = 0;
 454     while (text) {
 455         w += text_width(ctx, text);
 456         text = text->next;
 457     }
 458     return w;
 459 }
 460
 461 static int text_width(void *ctx, word *text) {
 462     int charset = * (int *) ctx;
 463
 464     switch (text->type) {
 465       case word_HyperLink:
 466       case word_HyperEnd:
 467       case word_UpperXref:
 468       case word_LowerXref:
 469       case word_XrefEnd:
 470       case word_IndexRef:
 471         return 0;
 472
 473       case word_Normal:
 474       case word_Emph:
 475       case word_Code:
 476       case word_WeakCode:
 477         return (((text->type == word_Emph ||
 478                   text->type == word_Code)
 479                  ? (attraux(text->aux) == attr_Only ? 2 :
 480                     attraux(text->aux) == attr_Always ? 0 : 1)
 481                  : 0) +
 482                 (cvt_ok(charset, text->text) || !text->alt ?
 483                  ustrlen(text->text) :
 484                  text_width_list(ctx, text->alt)));
 485
 486       case word_WhiteSpace:
 487       case word_EmphSpace:
 488       case word_CodeSpace:
 489       case word_WkCodeSpace:
 490       case word_Quote:
 491       case word_EmphQuote:
 492       case word_CodeQuote:
 493       case word_WkCodeQuote:
 494         assert(text->type != word_CodeQuote &&
 495                text->type != word_WkCodeQuote);
 496         return (((towordstyle(text->type) == word_Emph ||
 497                   towordstyle(text->type) == word_Code)
 498                  ? (attraux(text->aux) == attr_Only ? 2 :
 499                     attraux(text->aux) == attr_Always ? 0 : 1)
 500                  : 0) + 1);
 501     }
 502     return 0;                          /* should never happen */
 503 }
 504
 505 static void text_heading(textfile *tf, word *tprefix, word *nprefix,
 506                          word *text, alignstruct align,
 507                          int indent, int width) {
 508     rdstring t = { 0, 0, NULL };
 509     int margin, length;
 510     int firstlinewidth, wrapwidth;
 511     wrappedline *wrapping, *p;
 512
 513     if (align.just_numbers && nprefix) {
 514         text_rdaddw(tf->charset, &t, nprefix, NULL);
 515         rdadds(&t, align.number_suffix);
 516     } else if (!align.just_numbers && tprefix) {
 517         text_rdaddw(tf->charset, &t, tprefix, NULL);
 518         rdadds(&t, align.number_suffix);
 519     }
 520     margin = length = t.pos;
 521
 522     if (align.align == LEFTPLUS) {
 523         margin = indent - margin;
 524         if (margin < 0) margin = 0;
 525         firstlinewidth = indent + width - margin - length;
 526         wrapwidth = width;
 527     } else if (align.align == LEFT || align.align == CENTRE) {
 528         margin = 0;
 529         firstlinewidth = indent + width - length;
 530         wrapwidth = indent + width;
 531     }
 532
 533     wrapping = wrap_para(text, firstlinewidth, wrapwidth,
 534                          text_width, &tf->charset, 0);
 535     for (p = wrapping; p; p = p->next) {
 536         text_rdaddw(tf->charset, &t, p->begin, p->end);
 537         length = t.pos;
 538         if (align.align == CENTRE) {
 539             margin = (indent + width - length)/2;
 540             if (margin < 0) margin = 0;
 541         }
 542         text_output_many(tf, margin, L' ');
 543         text_output(tf, t.text);
 544         text_output(tf, L"\n");
 545         if (align.underline != L'\0') {
 546             text_output_many(tf, margin, L' ');
 547             text_output_many(tf, length, align.underline);
 548             text_output(tf, L"\n");
 549         }
 550         if (align.align == LEFTPLUS)
 551             margin = indent;
 552         else
 553             margin = 0;
 554         sfree(t.text);
 555         t = empty_rdstring;
 556     }
 557     wrap_free(wrapping);
 558     text_output(tf, L"\n");
 559
 560     sfree(t.text);
 561 }
 562
 563 static void text_rule(textfile *tf, int indent, int width) {
 564     text_output_many(tf, indent, L' ');
 565     text_output_many(tf, width, L'-');     /* FIXME: configurability! */
 566     text_output_many(tf, 2, L'\n');
 567 }
 568
 569 static void text_para(textfile *tf, word *prefix, wchar_t *prefixextra,
 570                       word *text, int indent, int extraindent, int width) {
 571     wrappedline *wrapping, *p;
 572     rdstring pfx = { 0, 0, NULL };
 573     int e;
 574     int firstlinewidth = width;
 575
 576     if (prefix) {
 577         text_rdaddw(tf->charset, &pfx, prefix, NULL);
 578         if (prefixextra)
 579             rdadds(&pfx, prefixextra);
 580         text_output_many(tf, indent, L' ');
 581         text_output(tf, pfx.text);
 582         /* If the prefix is too long, shorten the first line to fit. */
 583         e = extraindent - pfx.pos;
 584         if (e < 0) {
 585             firstlinewidth += e;       /* this decreases it, since e < 0 */
 586             if (firstlinewidth < 0) {
 587                 e = indent + extraindent;
 588                 firstlinewidth = width;
 589                 text_output(tf, L"\n");
 590             } else
 591                 e = 0;
 592         }
 593         sfree(pfx.text);
 594     } else
 595         e = indent + extraindent;
 596
 597     wrapping = wrap_para(text, firstlinewidth, width,
 598                          text_width, &tf->charset, 0);
 599     for (p = wrapping; p; p = p->next) {
 600         rdstring t = { 0, 0, NULL };
 601         text_rdaddw(tf->charset, &t, p->begin, p->end);
 602         text_output_many(tf, e, L' ');
 603         text_output(tf, t.text);
 604         text_output(tf, L"\n");
 605         e = indent + extraindent;
 606         sfree(t.text);
 607     }
 608     wrap_free(wrapping);
 609     text_output(tf, L"\n");
 610 }
 611
 612 static void text_codepara(textfile *tf, word *text, int indent, int width) {
 613     for (; text; text = text->next) if (text->type == word_WeakCode) {
 614         if (ustrlen(text->text) > width) {
 615             /* FIXME: warn */
 616         }
 617         text_output_many(tf, indent, L' ');
 618         text_output(tf, text->text);
 619         text_output(tf, L"\n");
 620     }
 621
 622     text_output(tf, L"\n");
 623 }
 624
 625 static void text_versionid(textfile *tf, word *text) {
 626     rdstring t = { 0, 0, NULL };
 627
 628     rdadd(&t, L'[');                   /* FIXME: configurability */
 629     text_rdaddw(tf->charset, &t, text, NULL);
 630     rdadd(&t, L']');                   /* FIXME: configurability */
 631     rdadd(&t, L'\n');
 632
 633     text_output(tf, t.text);
 634     sfree(t.text);
 635 }