mdw@git.distorted.org.uk Git - sgt/halibut/blob - misc.c

   1 /*
   2  * misc.c: miscellaneous useful items
   3  */
   4
   5 #include <stdarg.h>
   6 #include "halibut.h"
   7
   8 char *adv(char *s) {
   9     return s + 1 + strlen(s);
  10 }
  11
  12 struct stackTag {
  13     void **data;
  14     int sp;
  15     int size;
  16 };
  17
  18 stack stk_new(void) {
  19     stack s;
  20
  21     s = snew(struct stackTag);
  22     s->sp = 0;
  23     s->size = 0;
  24     s->data = NULL;
  25
  26     return s;
  27 }
  28
  29 void stk_free(stack s) {
  30     sfree(s->data);
  31     sfree(s);
  32 }
  33
  34 void stk_push(stack s, void *item) {
  35     if (s->size <= s->sp) {
  36         s->size = s->sp + 32;
  37         s->data = sresize(s->data, s->size, void *);
  38     }
  39     s->data[s->sp++] = item;
  40 }
  41
  42 void *stk_pop(stack s) {
  43     if (s->sp > 0)
  44         return s->data[--s->sp];
  45     else
  46         return NULL;
  47 }
  48
  49 void *stk_top(stack s) {
  50     if (s->sp > 0)
  51         return s->data[s->sp-1];
  52     else
  53         return NULL;
  54 }
  55
  56 /*
  57  * Small routines to amalgamate a string from an input source.
  58  */
  59 const rdstring empty_rdstring = {0, 0, NULL};
  60 const rdstringc empty_rdstringc = {0, 0, NULL};
  61
  62 void rdadd(rdstring *rs, wchar_t c) {
  63     if (rs->pos >= rs->size-1) {
  64         rs->size = rs->pos + 128;
  65         rs->text = sresize(rs->text, rs->size, wchar_t);
  66     }
  67     rs->text[rs->pos++] = c;
  68     rs->text[rs->pos] = 0;
  69 }
  70 void rdadds(rdstring *rs, wchar_t const *p) {
  71     int len = ustrlen(p);
  72     if (rs->pos >= rs->size - len) {
  73         rs->size = rs->pos + len + 128;
  74         rs->text = sresize(rs->text, rs->size, wchar_t);
  75     }
  76     ustrcpy(rs->text + rs->pos, p);
  77     rs->pos += len;
  78 }
  79 wchar_t *rdtrim(rdstring *rs) {
  80     rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
  81     return rs->text;
  82 }
  83
  84 void rdaddc(rdstringc *rs, char c) {
  85     if (rs->pos >= rs->size-1) {
  86         rs->size = rs->pos + 128;
  87         rs->text = sresize(rs->text, rs->size, char);
  88     }
  89     rs->text[rs->pos++] = c;
  90     rs->text[rs->pos] = 0;
  91 }
  92 void rdaddsc(rdstringc *rs, char const *p) {
  93     rdaddsn(rs, p, strlen(p));
  94 }
  95 void rdaddsn(rdstringc *rs, char const *p, int len) {
  96     if (rs->pos >= rs->size - len) {
  97         rs->size = rs->pos + len + 128;
  98         rs->text = sresize(rs->text, rs->size, char);
  99     }
 100     memcpy(rs->text + rs->pos, p, len);
 101     rs->pos += len;
 102     rs->text[rs->pos] = 0;
 103 }
 104 char *rdtrimc(rdstringc *rs) {
 105     rs->text = sresize(rs->text, rs->pos + 1, char);
 106     return rs->text;
 107 }
 108
 109 static int compare_wordlists_literally(word *a, word *b) {
 110     int t;
 111     while (a && b) {
 112         if (a->type != b->type)
 113             return (a->type < b->type ? -1 : +1);   /* FIXME? */
 114         t = a->type;
 115         if ((t != word_Normal && t != word_Code &&
 116              t != word_WeakCode && t != word_Emph) ||
 117             a->alt || b->alt) {
 118             int c;
 119             if (a->text && b->text) {
 120                 c = ustricmp(a->text, b->text);
 121                 if (c)
 122                     return c;
 123             }
 124             c = compare_wordlists_literally(a->alt, b->alt);
 125             if (c)
 126                 return c;
 127             a = a->next;
 128             b = b->next;
 129         } else {
 130             wchar_t *ap = a->text, *bp = b->text;
 131             while (*ap && *bp) {
 132                 wchar_t ac = *ap, bc = *bp;
 133                 if (ac != bc)
 134                     return (ac < bc ? -1 : +1);
 135                 if (!*++ap && a->next && a->next->type == t && !a->next->alt)
 136                     a = a->next, ap = a->text;
 137                 if (!*++bp && b->next && b->next->type == t && !b->next->alt)
 138                     b = b->next, bp = b->text;
 139             }
 140             if (*ap || *bp)
 141                 return (*ap ? +1 : -1);
 142             a = a->next;
 143             b = b->next;
 144         }
 145     }
 146
 147     if (a || b)
 148         return (a ? +1 : -1);
 149     else
 150         return 0;
 151 }
 152
 153 int compare_wordlists(word *a, word *b) {
 154     /*
 155      * First we compare only the alphabetic content of the word
 156      * lists, with case not a factor. If that comes out equal,
 157      * _then_ we compare the word lists literally.
 158      */
 159     struct {
 160         word *w;
 161         int i;
 162         wchar_t c;
 163     } pos[2];
 164
 165     pos[0].w = a;
 166     pos[1].w = b;
 167     pos[0].i = pos[1].i = 0;
 168
 169     while (1) {
 170         /*
 171          * Find the next alphabetic character in each word list.
 172          */
 173         int k;
 174
 175         for (k = 0; k < 2; k++) {
 176             /*
 177              * Advance until we hit either an alphabetic character
 178              * or the end of the word list.
 179              */
 180             while (1) {
 181                 if (!pos[k].w) {
 182                     /* End of word list. */
 183                     pos[k].c = 0;
 184                     break;
 185                 } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
 186                     /* No characters remaining in this word; move on. */
 187                     pos[k].w = pos[k].w->next;
 188                     pos[k].i = 0;
 189                 } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
 190                     /* This character isn't alphabetic; move on. */
 191                     pos[k].i++;
 192                 } else {
 193                     /* We have an alphabetic! Lowercase it and continue. */
 194                     pos[k].c = utolower(pos[k].w->text[pos[k].i]);
 195                     break;
 196                 }
 197             }
 198         }
 199
 200 #ifdef HAS_WCSCOLL
 201         {
 202             wchar_t a[2], b[2];
 203             int ret;
 204
 205             a[0] = pos[0].c;
 206             b[0] = pos[1].c;
 207             a[1] = b[1] = L'\0';
 208
 209             ret = wcscoll(a, b);
 210             if (ret)
 211                 return ret;
 212         }
 213 #else
 214         if (pos[0].c < pos[1].c)
 215             return -1;
 216         else if (pos[0].c > pos[1].c)
 217             return +1;
 218 #endif
 219
 220         if (!pos[0].c)
 221             break;                     /* they're equal */
 222
 223         pos[0].i++;
 224         pos[1].i++;
 225     }
 226
 227     /*
 228      * If we reach here, the strings were alphabetically equal, so
 229      * compare in more detail.
 230      */
 231     return compare_wordlists_literally(a, b);
 232 }
 233
 234 void mark_attr_ends(word *words)
 235 {
 236     word *w, *wp;
 237
 238     wp = NULL;
 239     for (w = words; w; w = w->next) {
 240         int both;
 241         if (!isvis(w->type))
 242             /* Invisible elements should not affect this calculation */
 243             continue;
 244         both = (isattr(w->type) &&
 245                 wp && isattr(wp->type) &&
 246                 sameattr(wp->type, w->type));
 247         w->aux |= both ? attr_Always : attr_First;
 248         if (wp && !both) {
 249             /* If previous considered word turns out to have been
 250              * the end of a run, tidy it up. */
 251             int wp_attr = attraux(wp->aux);
 252             wp->aux = (wp->aux & ~attr_mask) |
 253                 ((wp_attr == attr_Always) ? attr_Last
 254                          /* attr_First */ : attr_Only);
 255         }
 256         wp = w;
 257     }
 258
 259     /* Tidy up last word touched */
 260     if (wp) {
 261         int wp_attr = attraux(wp->aux);
 262         wp->aux = (wp->aux & ~attr_mask) |
 263             ((wp_attr == attr_Always) ? attr_Last
 264                      /* attr_First */ : attr_Only);
 265     }
 266 }
 267
 268 /*
 269  * This function implements the optimal paragraph wrapping
 270  * algorithm, pretty much as used in TeX. A cost function is
 271  * defined for each line of the wrapped paragraph (typically some
 272  * convex function of the difference between the line's length and
 273  * its desired length), and a dynamic programming approach is used
 274  * to optimise globally across all possible layouts of the
 275  * paragraph to find the one with the minimum total cost.
 276  *
 277  * The function as implemented here gives a choice of two options
 278  * for the cost function:
 279  *
 280  *  - If `natural_space' is zero, then the algorithm attempts to
 281  *    make each line the maximum possible width (either `width' or
 282  *    `subsequentwidth' depending on whether it's the first line of
 283  *    the paragraph or not), and the cost function is simply the
 284  *    square of the unused space at the end of each line. This is a
 285  *    simple mechanism suitable for use in fixed-pitch environments
 286  *    such as plain text displayed on a terminal.
 287  *
 288  *  - However, if `natural_space' is positive, the algorithm
 289  *    assumes the medium is fully graphical and that the width of
 290  *    space characters can be adjusted finely, and it attempts to
 291  *    make each _space character_ the width given in
 292  *    `natural_space'. (The provided width function should return
 293  *    the _minimum_ acceptable width of a space character in this
 294  *    case.) Therefore, the cost function for a line is dependent
 295  *    on the number of spaces on that line as well as the amount by
 296  *    which the line width differs from the optimum.
 297  */
 298 wrappedline *wrap_para(word *text, int width, int subsequentwidth,
 299                        int (*widthfn)(void *, word *), void *ctx,
 300                        int natural_space) {
 301     wrappedline *head = NULL, **ptr = &head;
 302     int nwords, wordsize;
 303     struct wrapword {
 304         word *begin, *end;
 305         int width;
 306         int spacewidth;
 307         int cost;
 308         int nwords;
 309     } *wrapwords;
 310     int i, j, n;
 311
 312     /*
 313      * Break the line up into wrappable components.
 314      */
 315     nwords = wordsize = 0;
 316     wrapwords = NULL;
 317     while (text) {
 318         if (nwords >= wordsize) {
 319             wordsize = nwords + 64;
 320             wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
 321         }
 322         wrapwords[nwords].width = 0;
 323         wrapwords[nwords].begin = text;
 324         while (text) {
 325             wrapwords[nwords].width += widthfn(ctx, text);
 326             wrapwords[nwords].end = text->next;
 327             if (text->next && (text->next->type == word_WhiteSpace ||
 328                                text->next->type == word_EmphSpace ||
 329                                text->breaks))
 330                 break;
 331             text = text->next;
 332         }
 333         if (text && text->next && (text->next->type == word_WhiteSpace ||
 334                            text->next->type == word_EmphSpace)) {
 335             wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
 336             text = text->next;
 337         } else {
 338             wrapwords[nwords].spacewidth = 0;
 339         }
 340         nwords++;
 341         if (text)
 342             text = text->next;
 343     }
 344
 345     /*
 346      * Perform the dynamic wrapping algorithm: work backwards from
 347      * nwords-1, determining the optimal wrapping for each terminal
 348      * subsequence of the paragraph.
 349      */
 350     for (i = nwords; i-- ;) {
 351         int best = -1;
 352         int bestcost = 0;
 353         int cost;
 354         int linelen = 0, spacewidth = 0, minspacewidth = 0;
 355         int nspaces;
 356         int thiswidth = (i == 0 ? width : subsequentwidth);
 357
 358         j = 0;
 359         nspaces = 0;
 360         while (i+j < nwords) {
 361             /*
 362              * See what happens if we put j+1 words on this line.
 363              */
 364             if (spacewidth) {
 365                 nspaces++;
 366                 minspacewidth = spacewidth;
 367             }
 368             linelen += spacewidth + wrapwords[i+j].width;
 369             spacewidth = wrapwords[i+j].spacewidth;
 370             j++;
 371             if (linelen > thiswidth) {
 372                 /*
 373                  * If we're over the width limit, abandon ship,
 374                  * _unless_ there is no best-effort yet (which will
 375                  * only happen if the first word is too long all by
 376                  * itself).
 377                  */
 378                 if (best > 0)
 379                     break;
 380             }
 381
 382             /*
 383              * Compute the cost of this line. The method of doing
 384              * this differs hugely depending on whether
 385              * natural_space is nonzero or not.
 386              */
 387             if (natural_space) {
 388                 if (!nspaces && linelen > thiswidth) {
 389                     /*
 390                      * Special case: if there are no spaces at all
 391                      * on the line because one single word is too
 392                      * long for its line, cost is zero because
 393                      * there's nothing we can do about it anyway.
 394                      */
 395                     cost = 0;
 396                 } else {
 397                     int shortfall = thiswidth - linelen;
 398                     int spaceextra = shortfall / (nspaces ? nspaces : 1);
 399                     int spaceshortfall = natural_space -
 400                         (minspacewidth + spaceextra);
 401
 402                     if (i+j == nwords && spaceshortfall < 0) {
 403                         /*
 404                          * Special case: on the very last line of
 405                          * the paragraph, we don't score penalty
 406                          * points for having to _stretch_ the line,
 407                          * since we won't stretch it anyway.
 408                          * However, we score penalties as normal
 409                          * for having to squeeze it.
 410                          */
 411                         cost = 0;
 412                     } else {
 413                         /*
 414                          * Squaring this number is tricky since
 415                          * it's liable to be quite big. Let's
 416                          * divide it through by 256.
 417                          */
 418                         int x = spaceshortfall >> 8;
 419                         int xf = spaceshortfall & 0xFF;
 420
 421                         /*
 422                          * Not counting strange variable-fixed-
 423                          * point oddities, we are computing
 424                          *
 425                          *   (x+xf)^2 = x^2 + 2*x*xf + xf*xf
 426                          *
 427                          * except that _our_ xf is 256 times the
 428                          * one listed there.
 429                          */
 430
 431                         cost = x * x;
 432                         cost += (2 * x * xf) >> 8;
 433                     }
 434                 }
 435             } else {
 436                 if (i+j == nwords) {
 437                     /*
 438                      * Special case: if we're at the very end of the
 439                      * paragraph, we don't score penalty points for the
 440                      * white space left on the line.
 441                      */
 442                     cost = 0;
 443                 } else {
 444                     cost = (thiswidth-linelen) * (thiswidth-linelen);
 445                 }
 446             }
 447
 448             /*
 449              * Add in the cost of wrapping all lines after this
 450              * point too.
 451              */
 452             if (i+j < nwords)
 453                 cost += wrapwords[i+j].cost;
 454
 455             /*
 456              * We compare bestcost >= cost, not bestcost > cost,
 457              * because in cases where the costs are identical we
 458              * want to try to look like the greedy algorithm,
 459              * because readers are likely to have spent a lot of
 460              * time looking at greedy-wrapped paragraphs and
 461              * there's no point violating the Principle of Least
 462              * Surprise if it doesn't actually gain anything.
 463              */
 464             if (best < 0 || bestcost >= cost) {
 465                 bestcost = cost;
 466                 best = j;
 467             }
 468         }
 469         /*
 470          * Now we know the optimal answer for this terminal
 471          * subsequence, so put it in wrapwords.
 472          */
 473         wrapwords[i].cost = bestcost;
 474         wrapwords[i].nwords = best;
 475     }
 476
 477     /*
 478      * We've wrapped the paragraph. Now build the output
 479      * `wrappedline' list.
 480      */
 481     i = 0;
 482     while (i < nwords) {
 483         wrappedline *w = snew(wrappedline);
 484         *ptr = w;
 485         ptr = &w->next;
 486         w->next = NULL;
 487
 488         n = wrapwords[i].nwords;
 489         w->begin = wrapwords[i].begin;
 490         w->end = wrapwords[i+n-1].end;
 491
 492         /*
 493          * Count along the words to find nspaces and shortfall.
 494          */
 495         w->nspaces = 0;
 496         w->shortfall = width;
 497         for (j = 0; j < n; j++) {
 498             w->shortfall -= wrapwords[i+j].width;
 499             if (j < n-1 && wrapwords[i+j].spacewidth) {
 500                 w->nspaces++;
 501                 w->shortfall -= wrapwords[i+j].spacewidth;
 502             }
 503         }
 504         i += n;
 505     }
 506
 507     sfree(wrapwords);
 508
 509     return head;
 510 }
 511
 512 void wrap_free(wrappedline *w) {
 513     while (w) {
 514         wrappedline *t = w->next;
 515         sfree(w);
 516         w = t;
 517     }
 518 }
 519
 520 void cmdline_cfg_add(paragraph *cfg, char *string)
 521 {
 522     wchar_t *ustring;
 523     int upos, ulen, pos, len;
 524
 525     ulen = 0;
 526     while (cfg->keyword[ulen])
 527         ulen += 1 + ustrlen(cfg->keyword+ulen);
 528     len = 0;
 529     while (cfg->origkeyword[len])
 530         len += 1 + strlen(cfg->origkeyword+len);
 531
 532     ustring = ufroma_locale_dup(string);
 533
 534     upos = ulen;
 535     ulen += 2 + ustrlen(ustring);
 536     cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
 537     ustrcpy(cfg->keyword+upos, ustring);
 538     cfg->keyword[ulen-1] = L'\0';
 539
 540     pos = len;
 541     len += 2 + strlen(string);
 542     cfg->origkeyword = sresize(cfg->origkeyword, len, char);
 543     strcpy(cfg->origkeyword+pos, string);
 544     cfg->origkeyword[len-1] = '\0';
 545
 546     sfree(ustring);
 547 }
 548
 549 paragraph *cmdline_cfg_new(void)
 550 {
 551     paragraph *p;
 552
 553     p = snew(paragraph);
 554     memset(p, 0, sizeof(*p));
 555     p->type = para_Config;
 556     p->next = NULL;
 557     p->fpos.filename = "<command line>";
 558     p->fpos.line = p->fpos.col = -1;
 559     p->keyword = ustrdup(L"\0");
 560     p->origkeyword = dupstr("\0");
 561
 562     return p;
 563 }
 564
 565 paragraph *cmdline_cfg_simple(char *string, ...)
 566 {
 567     va_list ap;
 568     char *s;
 569     paragraph *p;
 570
 571     p = cmdline_cfg_new();
 572     cmdline_cfg_add(p, string);
 573
 574     va_start(ap, string);
 575     while ((s = va_arg(ap, char *)) != NULL)
 576         cmdline_cfg_add(p, s);
 577     va_end(ap);
 578
 579     return p;
 580 }