mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_text.c

   1 /*
   2  * text backend for Halibut
   3  */
   4
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <assert.h>
   8 #include "halibut.h"
   9
  10 typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
  11 typedef struct {
  12     alignment align;
  13     int just_numbers;
  14     wchar_t underline;
  15     wchar_t *number_suffix;
  16 } alignstruct;
  17
  18 typedef struct {
  19     int indent, indent_code;
  20     int listindentbefore, listindentafter;
  21     int width;
  22     alignstruct atitle, achapter, *asect;
  23     int nasect;
  24     int include_version_id;
  25     int indent_preambles;
  26     int charset;
  27     word bullet;
  28     char *filename;
  29 } textconfig;
  30
  31 typedef struct {
  32     FILE *fp;
  33     int charset;
  34     charset_state state;
  35 } textfile;
  36
  37 static void text_heading(textfile *, word *, word *, word *, alignstruct,
  38                          int,int);
  39 static void text_rule(textfile *, int, int);
  40 static void text_para(textfile *, word *, wchar_t *, word *, int, int, int);
  41 static void text_codepara(textfile *, word *, int, int);
  42 static void text_versionid(textfile *, word *);
  43
  44 static void text_output(textfile *, const wchar_t *);
  45 static void text_output_many(textfile *, int, wchar_t);
  46
  47 static alignment utoalign(wchar_t *p) {
  48     if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
  49         return CENTRE;
  50     if (!ustricmp(p, L"leftplus"))
  51         return LEFTPLUS;
  52     return LEFT;
  53 }
  54
  55 static textconfig text_configure(paragraph *source) {
  56     textconfig ret;
  57
  58     /*
  59      * Non-negotiables.
  60      */
  61     ret.bullet.next = NULL;
  62     ret.bullet.alt = NULL;
  63     ret.bullet.type = word_Normal;
  64     ret.atitle.just_numbers = FALSE;   /* ignored */
  65
  66     /*
  67      * Defaults.
  68      */
  69     ret.indent = 7;
  70     ret.indent_code = 2;
  71     ret.listindentbefore = 1;
  72     ret.listindentafter = 3;
  73     ret.width = 68;
  74     ret.atitle.align = CENTRE;
  75     ret.atitle.underline = L'=';
  76     ret.achapter.align = LEFT;
  77     ret.achapter.just_numbers = FALSE;
  78     ret.achapter.number_suffix = L": ";
  79     ret.achapter.underline = L'-';
  80     ret.nasect = 1;
  81     ret.asect = mknewa(alignstruct, ret.nasect);
  82     ret.asect[0].align = LEFTPLUS;
  83     ret.asect[0].just_numbers = TRUE;
  84     ret.asect[0].number_suffix = L" ";
  85     ret.asect[0].underline = L'\0';
  86     ret.include_version_id = TRUE;
  87     ret.indent_preambles = FALSE;
  88     ret.bullet.text = L"-";
  89     ret.filename = dupstr("output.txt");
  90     ret.charset = CS_ASCII;
  91
  92     for (; source; source = source->next) {
  93         if (source->type == para_Config) {
  94             if (!ustricmp(source->keyword, L"text-indent")) {
  95                 ret.indent = utoi(uadv(source->keyword));
  96             } else if (!ustricmp(source->keyword, L"text-charset")) {
  97                 char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
  98                 ret.charset = charset_from_localenc(csname);
  99                 sfree(csname);
 100             } else if (!ustricmp(source->keyword, L"text-filename")) {
 101                 sfree(ret.filename);
 102                 ret.filename = dupstr(adv(source->origkeyword));
 103             } else if (!ustricmp(source->keyword, L"text-indent-code")) {
 104                 ret.indent_code = utoi(uadv(source->keyword));
 105             } else if (!ustricmp(source->keyword, L"text-width")) {
 106                 ret.width = utoi(uadv(source->keyword));
 107             } else if (!ustricmp(source->keyword, L"text-list-indent")) {
 108                 ret.listindentbefore = utoi(uadv(source->keyword));
 109             } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
 110                 ret.listindentafter = utoi(uadv(source->keyword));
 111             } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
 112                 ret.achapter.align = utoalign(uadv(source->keyword));
 113             } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
 114                 ret.achapter.underline = *uadv(source->keyword);
 115             } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
 116                 ret.achapter.just_numbers = utob(uadv(source->keyword));
 117             } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
 118                 ret.achapter.number_suffix = uadv(source->keyword);
 119             } else if (!ustricmp(source->keyword, L"text-section-align")) {
 120                 wchar_t *p = uadv(source->keyword);
 121                 int n = 0;
 122                 if (uisdigit(*p)) {
 123                     n = utoi(p);
 124                     p = uadv(p);
 125                 }
 126                 if (n >= ret.nasect) {
 127                     int i;
 128                     ret.asect = resize(ret.asect, n+1);
 129                     for (i = ret.nasect; i <= n; i++)
 130                         ret.asect[i] = ret.asect[ret.nasect-1];
 131                     ret.nasect = n+1;
 132                 }
 133                 ret.asect[n].align = utoalign(p);
 134             } else if (!ustricmp(source->keyword, L"text-section-underline")) {
 135                 wchar_t *p = uadv(source->keyword);
 136                 int n = 0;
 137                 if (uisdigit(*p)) {
 138                     n = utoi(p);
 139                     p = uadv(p);
 140                 }
 141                 if (n >= ret.nasect) {
 142                     int i;
 143                     ret.asect = resize(ret.asect, n+1);
 144                     for (i = ret.nasect; i <= n; i++)
 145                         ret.asect[i] = ret.asect[ret.nasect-1];
 146                     ret.nasect = n+1;
 147                 }
 148                 ret.asect[n].underline = *p;
 149             } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
 150                 wchar_t *p = uadv(source->keyword);
 151                 int n = 0;
 152                 if (uisdigit(*p)) {
 153                     n = utoi(p);
 154                     p = uadv(p);
 155                 }
 156                 if (n >= ret.nasect) {
 157                     int i;
 158                     ret.asect = resize(ret.asect, n+1);
 159                     for (i = ret.nasect; i <= n; i++)
 160                         ret.asect[i] = ret.asect[ret.nasect-1];
 161                     ret.nasect = n+1;
 162                 }
 163                 ret.asect[n].just_numbers = utob(p);
 164             } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
 165                 wchar_t *p = uadv(source->keyword);
 166                 int n = 0;
 167                 if (uisdigit(*p)) {
 168                     n = utoi(p);
 169                     p = uadv(p);
 170                 }
 171                 if (n >= ret.nasect) {
 172                     int i;
 173                     ret.asect = resize(ret.asect, n+1);
 174                     for (i = ret.nasect; i <= n; i++) {
 175                         ret.asect[i] = ret.asect[ret.nasect-1];
 176                     }
 177                     ret.nasect = n+1;
 178                 }
 179                 ret.asect[n].number_suffix = p;
 180             } else if (!ustricmp(source->keyword, L"text-title-align")) {
 181                 ret.atitle.align = utoalign(uadv(source->keyword));
 182             } else if (!ustricmp(source->keyword, L"text-title-underline")) {
 183                 ret.atitle.underline = *uadv(source->keyword);
 184             } else if (!ustricmp(source->keyword, L"text-versionid")) {
 185                 ret.include_version_id = utob(uadv(source->keyword));
 186             } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
 187                 ret.indent_preambles = utob(uadv(source->keyword));
 188             } else if (!ustricmp(source->keyword, L"text-bullet")) {
 189                 ret.bullet.text = uadv(source->keyword);
 190             }
 191         }
 192     }
 193
 194     return ret;
 195 }
 196
 197 paragraph *text_config_filename(char *filename)
 198 {
 199     return cmdline_cfg_simple("text-filename", filename, NULL);
 200 }
 201
 202 void text_backend(paragraph *sourceform, keywordlist *keywords,
 203                   indexdata *idx, void *unused) {
 204     paragraph *p;
 205     textconfig conf;
 206     word *prefix, *body, *wp;
 207     word spaceword;
 208     textfile tf;
 209     wchar_t *prefixextra;
 210     int nesting, nestindent;
 211     int indentb, indenta;
 212
 213     IGNORE(unused);
 214     IGNORE(keywords);                  /* we don't happen to need this */
 215     IGNORE(idx);                       /* or this */
 216
 217     conf = text_configure(sourceform);
 218
 219     /*
 220      * Open the output file.
 221      */
 222     tf.fp = fopen(conf.filename, "w");
 223     if (!tf.fp) {
 224         error(err_cantopenw, conf.filename);
 225         return;
 226     }
 227     tf.charset = conf.charset;
 228     tf.state = charset_init_state;
 229
 230     /* Do the title */
 231     for (p = sourceform; p; p = p->next)
 232         if (p->type == para_Title)
 233             text_heading(&tf, NULL, NULL, p->words,
 234                          conf.atitle, conf.indent, conf.width);
 235
 236     nestindent = conf.listindentbefore + conf.listindentafter;
 237     nesting = (conf.indent_preambles ? 0 : -conf.indent);
 238
 239     /* Do the main document */
 240     for (p = sourceform; p; p = p->next) switch (p->type) {
 241
 242       case para_QuotePush:
 243         nesting += 2;
 244         break;
 245       case para_QuotePop:
 246         nesting -= 2;
 247         assert(nesting >= 0);
 248         break;
 249
 250       case para_LcontPush:
 251         nesting += nestindent;
 252         break;
 253       case para_LcontPop:
 254         nesting -= nestindent;
 255         assert(nesting >= 0);
 256         break;
 257
 258         /*
 259          * Things we ignore because we've already processed them or
 260          * aren't going to touch them in this pass.
 261          */
 262       case para_IM:
 263       case para_BR:
 264       case para_Biblio:                /* only touch BiblioCited */
 265       case para_VersionID:
 266       case para_NoCite:
 267       case para_Title:
 268         break;
 269
 270         /*
 271          * Chapter titles.
 272          */
 273       case para_Chapter:
 274       case para_Appendix:
 275       case para_UnnumberedChapter:
 276         text_heading(&tf, p->kwtext, p->kwtext2, p->words,
 277                      conf.achapter, conf.indent, conf.width);
 278         nesting = 0;
 279         break;
 280
 281       case para_Heading:
 282       case para_Subsect:
 283         text_heading(&tf, p->kwtext, p->kwtext2, p->words,
 284                      conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
 285                      conf.indent, conf.width);
 286         break;
 287
 288       case para_Rule:
 289         text_rule(&tf, conf.indent + nesting, conf.width - nesting);
 290         break;
 291
 292       case para_Normal:
 293       case para_Copyright:
 294       case para_DescribedThing:
 295       case para_Description:
 296       case para_BiblioCited:
 297       case para_Bullet:
 298       case para_NumberedList:
 299         if (p->type == para_Bullet) {
 300             prefix = &conf.bullet;
 301             prefixextra = NULL;
 302             indentb = conf.listindentbefore;
 303             indenta = conf.listindentafter;
 304         } else if (p->type == para_NumberedList) {
 305             prefix = p->kwtext;
 306             prefixextra = L".";        /* FIXME: configurability */
 307             indentb = conf.listindentbefore;
 308             indenta = conf.listindentafter;
 309         } else if (p->type == para_Description) {
 310             prefix = NULL;
 311             prefixextra = NULL;
 312             indentb = conf.listindentbefore;
 313             indenta = conf.listindentafter;
 314         } else {
 315             prefix = NULL;
 316             prefixextra = NULL;
 317             indentb = indenta = 0;
 318         }
 319         if (p->type == para_BiblioCited) {
 320             body = dup_word_list(p->kwtext);
 321             for (wp = body; wp->next; wp = wp->next);
 322             wp->next = &spaceword;
 323             spaceword.next = p->words;
 324             spaceword.alt = NULL;
 325             spaceword.type = word_WhiteSpace;
 326             spaceword.text = NULL;
 327         } else {
 328             wp = NULL;
 329             body = p->words;
 330         }
 331         text_para(&tf, prefix, prefixextra, body,
 332                   conf.indent + nesting + indentb, indenta,
 333                   conf.width - nesting - indentb - indenta);
 334         if (wp) {
 335             wp->next = NULL;
 336             free_word_list(body);
 337         }
 338         break;
 339
 340       case para_Code:
 341         text_codepara(&tf, p->words,
 342                       conf.indent + nesting + conf.indent_code,
 343                       conf.width - nesting - 2 * conf.indent_code);
 344         break;
 345     }
 346
 347     /* Do the version ID */
 348     if (conf.include_version_id) {
 349         for (p = sourceform; p; p = p->next)
 350             if (p->type == para_VersionID)
 351                 text_versionid(&tf, p->words);
 352     }
 353
 354     /*
 355      * Tidy up
 356      */
 357     text_output(&tf, NULL);            /* end charset conversion */
 358     fclose(tf.fp);
 359     sfree(conf.asect);
 360     sfree(conf.filename);
 361 }
 362
 363 static int text_ok(int charset, const wchar_t *s)
 364 {
 365     char buf[256];
 366     charset_state state = CHARSET_INIT_STATE;
 367     int err, len = ustrlen(s);
 368
 369     err = 0;
 370     while (len > 0) {
 371         (void)charset_from_unicode(&s, &len, buf, lenof(buf),
 372                                    charset, &state, &err);
 373         if (err)
 374             return FALSE;
 375     }
 376     return TRUE;
 377 }
 378
 379 static void text_output(textfile *tf, const wchar_t *s)
 380 {
 381     char buf[256];
 382     int ret, len;
 383     const wchar_t **sp;
 384
 385     if (!s) {
 386         sp = NULL;
 387         len = 1;
 388     } else {
 389         sp = &s;
 390         len = ustrlen(s);
 391     }
 392
 393     while (len > 0) {
 394         ret = charset_from_unicode(sp, &len, buf, lenof(buf),
 395                                    tf->charset, &tf->state, NULL);
 396         if (!sp)
 397             len = 0;
 398         fwrite(buf, 1, ret, tf->fp);
 399     }
 400 }
 401
 402 static void text_output_many(textfile *tf, int n, wchar_t c)
 403 {
 404     wchar_t s[2];
 405     s[0] = c;
 406     s[1] = L'\0';
 407     while (n--)
 408         text_output(tf, s);
 409 }
 410
 411 static void text_rdaddw(int charset, rdstring *rs, word *text, word *end) {
 412     for (; text && text != end; text = text->next) switch (text->type) {
 413       case word_HyperLink:
 414       case word_HyperEnd:
 415       case word_UpperXref:
 416       case word_LowerXref:
 417       case word_XrefEnd:
 418       case word_IndexRef:
 419         break;
 420
 421       case word_Normal:
 422       case word_Emph:
 423       case word_Code:
 424       case word_WeakCode:
 425       case word_WhiteSpace:
 426       case word_EmphSpace:
 427       case word_CodeSpace:
 428       case word_WkCodeSpace:
 429       case word_Quote:
 430       case word_EmphQuote:
 431       case word_CodeQuote:
 432       case word_WkCodeQuote:
 433         assert(text->type != word_CodeQuote &&
 434                text->type != word_WkCodeQuote);
 435         if (towordstyle(text->type) == word_Emph &&
 436             (attraux(text->aux) == attr_First ||
 437              attraux(text->aux) == attr_Only))
 438             rdadd(rs, L'_');           /* FIXME: configurability */
 439         else if (towordstyle(text->type) == word_Code &&
 440                  (attraux(text->aux) == attr_First ||
 441                   attraux(text->aux) == attr_Only))
 442             rdadd(rs, L'`');           /* FIXME: configurability */
 443         if (removeattr(text->type) == word_Normal) {
 444             if (text_ok(charset, text->text) || !text->alt)
 445                 rdadds(rs, text->text);
 446             else
 447                 text_rdaddw(charset, rs, text->alt, NULL);
 448         } else if (removeattr(text->type) == word_WhiteSpace) {
 449             rdadd(rs, L' ');
 450         } else if (removeattr(text->type) == word_Quote) {
 451             rdadd(rs, quoteaux(text->aux) == quote_Open ? L'`' : L'\'');
 452                                        /* FIXME: configurability */
 453         }
 454         if (towordstyle(text->type) == word_Emph &&
 455             (attraux(text->aux) == attr_Last ||
 456              attraux(text->aux) == attr_Only))
 457             rdadd(rs, L'_');           /* FIXME: configurability */
 458         else if (towordstyle(text->type) == word_Code &&
 459                  (attraux(text->aux) == attr_Last ||
 460                   attraux(text->aux) == attr_Only))
 461             rdadd(rs, L'\'');          /* FIXME: configurability */
 462         break;
 463     }
 464 }
 465
 466 static int text_width(void *, word *);
 467
 468 static int text_width_list(void *ctx, word *text) {
 469     int w = 0;
 470     while (text) {
 471         w += text_width(ctx, text);
 472         text = text->next;
 473     }
 474     return w;
 475 }
 476
 477 static int text_width(void *ctx, word *text) {
 478     int charset = * (int *) ctx;
 479
 480     switch (text->type) {
 481       case word_HyperLink:
 482       case word_HyperEnd:
 483       case word_UpperXref:
 484       case word_LowerXref:
 485       case word_XrefEnd:
 486       case word_IndexRef:
 487         return 0;
 488
 489       case word_Normal:
 490       case word_Emph:
 491       case word_Code:
 492       case word_WeakCode:
 493         return (((text->type == word_Emph ||
 494                   text->type == word_Code)
 495                  ? (attraux(text->aux) == attr_Only ? 2 :
 496                     attraux(text->aux) == attr_Always ? 0 : 1)
 497                  : 0) +
 498                 (text_ok(charset, text->text) || !text->alt ?
 499                  ustrlen(text->text) :
 500                  text_width_list(ctx, text->alt)));
 501
 502       case word_WhiteSpace:
 503       case word_EmphSpace:
 504       case word_CodeSpace:
 505       case word_WkCodeSpace:
 506       case word_Quote:
 507       case word_EmphQuote:
 508       case word_CodeQuote:
 509       case word_WkCodeQuote:
 510         assert(text->type != word_CodeQuote &&
 511                text->type != word_WkCodeQuote);
 512         return (((towordstyle(text->type) == word_Emph ||
 513                   towordstyle(text->type) == word_Code)
 514                  ? (attraux(text->aux) == attr_Only ? 2 :
 515                     attraux(text->aux) == attr_Always ? 0 : 1)
 516                  : 0) + 1);
 517     }
 518     return 0;                          /* should never happen */
 519 }
 520
 521 static void text_heading(textfile *tf, word *tprefix, word *nprefix,
 522                          word *text, alignstruct align,
 523                          int indent, int width) {
 524     rdstring t = { 0, 0, NULL };
 525     int margin, length;
 526     int firstlinewidth, wrapwidth;
 527     wrappedline *wrapping, *p;
 528
 529     if (align.just_numbers && nprefix) {
 530         text_rdaddw(tf->charset, &t, nprefix, NULL);
 531         rdadds(&t, align.number_suffix);
 532     } else if (!align.just_numbers && tprefix) {
 533         text_rdaddw(tf->charset, &t, tprefix, NULL);
 534         rdadds(&t, align.number_suffix);
 535     }
 536     margin = length = t.pos;
 537
 538     if (align.align == LEFTPLUS) {
 539         margin = indent - margin;
 540         if (margin < 0) margin = 0;
 541         firstlinewidth = indent + width - margin - length;
 542         wrapwidth = width;
 543     } else if (align.align == LEFT || align.align == CENTRE) {
 544         margin = 0;
 545         firstlinewidth = indent + width - length;
 546         wrapwidth = indent + width;
 547     }
 548
 549     wrapping = wrap_para(text, firstlinewidth, wrapwidth,
 550                          text_width, &tf->charset, 0);
 551     for (p = wrapping; p; p = p->next) {
 552         text_rdaddw(tf->charset, &t, p->begin, p->end);
 553         length = t.pos;
 554         if (align.align == CENTRE) {
 555             margin = (indent + width - length)/2;
 556             if (margin < 0) margin = 0;
 557         }
 558         text_output_many(tf, margin, L' ');
 559         text_output(tf, t.text);
 560         text_output(tf, L"\n");
 561         if (align.underline != L'\0') {
 562             text_output_many(tf, margin, L' ');
 563             text_output_many(tf, length, align.underline);
 564             text_output(tf, L"\n");
 565         }
 566         if (align.align == LEFTPLUS)
 567             margin = indent;
 568         else
 569             margin = 0;
 570         sfree(t.text);
 571         t = empty_rdstring;
 572     }
 573     wrap_free(wrapping);
 574     text_output(tf, L"\n");
 575
 576     sfree(t.text);
 577 }
 578
 579 static void text_rule(textfile *tf, int indent, int width) {
 580     text_output_many(tf, indent, L' ');
 581     text_output_many(tf, width, L'-');     /* FIXME: configurability! */
 582     text_output_many(tf, 2, L'\n');
 583 }
 584
 585 static void text_para(textfile *tf, word *prefix, wchar_t *prefixextra,
 586                       word *text, int indent, int extraindent, int width) {
 587     wrappedline *wrapping, *p;
 588     rdstring pfx = { 0, 0, NULL };
 589     int e;
 590     int firstlinewidth = width;
 591
 592     if (prefix) {
 593         text_rdaddw(tf->charset, &pfx, prefix, NULL);
 594         if (prefixextra)
 595             rdadds(&pfx, prefixextra);
 596         text_output_many(tf, indent, L' ');
 597         text_output(tf, pfx.text);
 598         /* If the prefix is too long, shorten the first line to fit. */
 599         e = extraindent - pfx.pos;
 600         if (e < 0) {
 601             firstlinewidth += e;       /* this decreases it, since e < 0 */
 602             if (firstlinewidth < 0) {
 603                 e = indent + extraindent;
 604                 firstlinewidth = width;
 605                 text_output(tf, L"\n");
 606             } else
 607                 e = 0;
 608         }
 609         sfree(pfx.text);
 610     } else
 611         e = indent + extraindent;
 612
 613     wrapping = wrap_para(text, firstlinewidth, width,
 614                          text_width, &tf->charset, 0);
 615     for (p = wrapping; p; p = p->next) {
 616         rdstring t = { 0, 0, NULL };
 617         text_rdaddw(tf->charset, &t, p->begin, p->end);
 618         text_output_many(tf, e, L' ');
 619         text_output(tf, t.text);
 620         text_output(tf, L"\n");
 621         e = indent + extraindent;
 622         sfree(t.text);
 623     }
 624     wrap_free(wrapping);
 625     text_output(tf, L"\n");
 626 }
 627
 628 static void text_codepara(textfile *tf, word *text, int indent, int width) {
 629     for (; text; text = text->next) if (text->type == word_WeakCode) {
 630         if (ustrlen(text->text) > width) {
 631             /* FIXME: warn */
 632         }
 633         text_output_many(tf, indent, L' ');
 634         text_output(tf, text->text);
 635         text_output(tf, L"\n");
 636     }
 637
 638     text_output(tf, L"\n");
 639 }
 640
 641 static void text_versionid(textfile *tf, word *text) {
 642     rdstring t = { 0, 0, NULL };
 643
 644     rdadd(&t, L'[');                   /* FIXME: configurability */
 645     text_rdaddw(tf->charset, &t, text, NULL);
 646     rdadd(&t, L']');                   /* FIXME: configurability */
 647     rdadd(&t, L'\n');
 648
 649     text_output(tf, t.text);
 650     sfree(t.text);
 651 }