mdw@git.distorted.org.uk Git - sgt/halibut/blob - misc.c

   1 /*
   2  * misc.c: miscellaneous useful items
   3  */
   4
   5 #include <stdarg.h>
   6 #include "halibut.h"
   7
   8 char *adv(char *s) {
   9     return s + 1 + strlen(s);
  10 }
  11
  12 struct stackTag {
  13     void **data;
  14     int sp;
  15     int size;
  16 };
  17
  18 stack stk_new(void) {
  19     stack s;
  20
  21     s = mknew(struct stackTag);
  22     s->sp = 0;
  23     s->size = 0;
  24     s->data = NULL;
  25
  26     return s;
  27 }
  28
  29 void stk_free(stack s) {
  30     sfree(s->data);
  31     sfree(s);
  32 }
  33
  34 void stk_push(stack s, void *item) {
  35     if (s->size <= s->sp) {
  36         s->size = s->sp + 32;
  37         s->data = resize(s->data, s->size);
  38     }
  39     s->data[s->sp++] = item;
  40 }
  41
  42 void *stk_pop(stack s) {
  43     if (s->sp > 0)
  44         return s->data[--s->sp];
  45     else
  46         return NULL;
  47 }
  48
  49 void *stk_top(stack s) {
  50     if (s->sp > 0)
  51         return s->data[s->sp-1];
  52     else
  53         return NULL;
  54 }
  55
  56 /*
  57  * Small routines to amalgamate a string from an input source.
  58  */
  59 const rdstring empty_rdstring = {0, 0, NULL};
  60 const rdstringc empty_rdstringc = {0, 0, NULL};
  61
  62 void rdadd(rdstring *rs, wchar_t c) {
  63     if (rs->pos >= rs->size-1) {
  64         rs->size = rs->pos + 128;
  65         rs->text = resize(rs->text, rs->size);
  66     }
  67     rs->text[rs->pos++] = c;
  68     rs->text[rs->pos] = 0;
  69 }
  70 void rdadds(rdstring *rs, wchar_t const *p) {
  71     int len = ustrlen(p);
  72     if (rs->pos >= rs->size - len) {
  73         rs->size = rs->pos + len + 128;
  74         rs->text = resize(rs->text, rs->size);
  75     }
  76     ustrcpy(rs->text + rs->pos, p);
  77     rs->pos += len;
  78 }
  79 wchar_t *rdtrim(rdstring *rs) {
  80     rs->text = resize(rs->text, rs->pos + 1);
  81     return rs->text;
  82 }
  83
  84 void rdaddc(rdstringc *rs, char c) {
  85     if (rs->pos >= rs->size-1) {
  86         rs->size = rs->pos + 128;
  87         rs->text = resize(rs->text, rs->size);
  88     }
  89     rs->text[rs->pos++] = c;
  90     rs->text[rs->pos] = 0;
  91 }
  92 void rdaddsc(rdstringc *rs, char const *p) {
  93     int len = strlen(p);
  94     if (rs->pos >= rs->size - len) {
  95         rs->size = rs->pos + len + 128;
  96         rs->text = resize(rs->text, rs->size);
  97     }
  98     strcpy(rs->text + rs->pos, p);
  99     rs->pos += len;
 100 }
 101 char *rdtrimc(rdstringc *rs) {
 102     rs->text = resize(rs->text, rs->pos + 1);
 103     return rs->text;
 104 }
 105
 106 static int compare_wordlists_literally(word *a, word *b) {
 107     int t;
 108     while (a && b) {
 109         if (a->type != b->type)
 110             return (a->type < b->type ? -1 : +1);   /* FIXME? */
 111         t = a->type;
 112         if ((t != word_Normal && t != word_Code &&
 113              t != word_WeakCode && t != word_Emph) ||
 114             a->alt || b->alt) {
 115             int c;
 116             if (a->text && b->text) {
 117                 c = ustricmp(a->text, b->text);
 118                 if (c)
 119                     return c;
 120             }
 121             c = compare_wordlists_literally(a->alt, b->alt);
 122             if (c)
 123                 return c;
 124             a = a->next;
 125             b = b->next;
 126         } else {
 127             wchar_t *ap = a->text, *bp = b->text;
 128             while (*ap && *bp) {
 129                 wchar_t ac = utolower(*ap), bc = utolower(*bp);
 130                 if (ac != bc)
 131                     return (ac < bc ? -1 : +1);
 132                 if (!*++ap && a->next && a->next->type == t && !a->next->alt)
 133                     a = a->next, ap = a->text;
 134                 if (!*++bp && b->next && b->next->type == t && !b->next->alt)
 135                     b = b->next, bp = b->text;
 136             }
 137             if (*ap || *bp)
 138                 return (*ap ? +1 : -1);
 139             a = a->next;
 140             b = b->next;
 141         }
 142     }
 143
 144     if (a || b)
 145         return (a ? +1 : -1);
 146     else
 147         return 0;
 148 }
 149
 150 int compare_wordlists(word *a, word *b) {
 151     /*
 152      * First we compare only the alphabetic content of the word
 153      * lists, with case not a factor. If that comes out equal,
 154      * _then_ we compare the word lists literally.
 155      */
 156     struct {
 157         word *w;
 158         int i;
 159         wchar_t c;
 160     } pos[2];
 161
 162     pos[0].w = a;
 163     pos[1].w = b;
 164     pos[0].i = pos[1].i = 0;
 165
 166     while (1) {
 167         /*
 168          * Find the next alphabetic character in each word list.
 169          */
 170         int k;
 171
 172         for (k = 0; k < 2; k++) {
 173             /*
 174              * Advance until we hit either an alphabetic character
 175              * or the end of the word list.
 176              */
 177             while (1) {
 178                 if (!pos[k].w) {
 179                     /* End of word list. */
 180                     pos[k].c = 0;
 181                     break;
 182                 } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
 183                     /* No characters remaining in this word; move on. */
 184                     pos[k].w = pos[k].w->next;
 185                     pos[k].i = 0;
 186                 } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
 187                     /* This character isn't alphabetic; move on. */
 188                     pos[k].i++;
 189                 } else {
 190                     /* We have an alphabetic! Lowercase it and continue. */
 191                     pos[k].c = utolower(pos[k].w->text[pos[k].i]);
 192                     break;
 193                 }
 194             }
 195         }
 196
 197         if (pos[0].c < pos[1].c)
 198             return -1;
 199         else if (pos[0].c > pos[1].c)
 200             return +1;
 201
 202         if (!pos[0].c)
 203             break;                     /* they're equal */
 204
 205         pos[0].i++;
 206         pos[1].i++;
 207     }
 208
 209     /*
 210      * If we reach here, the strings were alphabetically equal, so
 211      * compare in more detail.
 212      */
 213     return compare_wordlists_literally(a, b);
 214 }
 215
 216 void mark_attr_ends(paragraph *sourceform) {
 217     paragraph *p;
 218     word *w, *wp;
 219     for (p = sourceform; p; p = p->next) {
 220         wp = NULL;
 221         for (w = p->words; w; w = w->next) {
 222             if (isattr(w->type)) {
 223                 int before = (wp && isattr(wp->type) &&
 224                               sameattr(wp->type, w->type));
 225                 int after = (w->next && isattr(w->next->type) &&
 226                              sameattr(w->next->type, w->type));
 227                 w->aux |= (before ?
 228                            (after ? attr_Always : attr_Last) :
 229                            (after ? attr_First : attr_Only));
 230             }
 231             wp = w;
 232         }
 233     }
 234 }
 235
 236 /*
 237  * This function implements the optimal paragraph wrapping
 238  * algorithm, pretty much as used in TeX. A cost function is
 239  * defined for each line of the wrapped paragraph (typically some
 240  * convex function of the difference between the line's length and
 241  * its desired length), and a dynamic programming approach is used
 242  * to optimise globally across all possible layouts of the
 243  * paragraph to find the one with the minimum total cost.
 244  *
 245  * The function as implemented here gives a choice of two options
 246  * for the cost function:
 247  *
 248  *  - If `natural_space' is zero, then the algorithm attempts to
 249  *    make each line the maximum possible width (either `width' or
 250  *    `subsequentwidth' depending on whether it's the first line of
 251  *    the paragraph or not), and the cost function is simply the
 252  *    square of the unused space at the end of each line. This is a
 253  *    simple mechanism suitable for use in fixed-pitch environments
 254  *    such as plain text displayed on a terminal.
 255  *
 256  *  - However, if `natural_space' is positive, the algorithm
 257  *    assumes the medium is fully graphical and that the width of
 258  *    space characters can be adjusted finely, and it attempts to
 259  *    make each _space character_ the width given in
 260  *    `natural_space'. (The provided width function should return
 261  *    the _minimum_ acceptable width of a space character in this
 262  *    case.) Therefore, the cost function for a line is dependent
 263  *    on the number of spaces on that line as well as the amount by
 264  *    which the line width differs from the optimum.
 265  */
 266 wrappedline *wrap_para(word *text, int width, int subsequentwidth,
 267                        int (*widthfn)(void *, word *), void *ctx,
 268                        int natural_space) {
 269     wrappedline *head = NULL, **ptr = &head;
 270     int nwords, wordsize;
 271     struct wrapword {
 272         word *begin, *end;
 273         int width;
 274         int spacewidth;
 275         int cost;
 276         int nwords;
 277     } *wrapwords;
 278     int i, j, n;
 279
 280     /*
 281      * Break the line up into wrappable components.
 282      */
 283     nwords = wordsize = 0;
 284     wrapwords = NULL;
 285     while (text) {
 286         if (nwords >= wordsize) {
 287             wordsize = nwords + 64;
 288             wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
 289         }
 290         wrapwords[nwords].width = 0;
 291         wrapwords[nwords].begin = text;
 292         while (text) {
 293             wrapwords[nwords].width += widthfn(ctx, text);
 294             wrapwords[nwords].end = text->next;
 295             if (text->next && (text->next->type == word_WhiteSpace ||
 296                                text->next->type == word_EmphSpace ||
 297                                text->breaks))
 298                 break;
 299             text = text->next;
 300         }
 301         if (text && text->next && (text->next->type == word_WhiteSpace ||
 302                            text->next->type == word_EmphSpace)) {
 303             wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
 304             text = text->next;
 305         } else {
 306             wrapwords[nwords].spacewidth = 0;
 307         }
 308         nwords++;
 309         if (text)
 310             text = text->next;
 311     }
 312
 313     /*
 314      * Perform the dynamic wrapping algorithm: work backwards from
 315      * nwords-1, determining the optimal wrapping for each terminal
 316      * subsequence of the paragraph.
 317      */
 318     for (i = nwords; i-- ;) {
 319         int best = -1;
 320         int bestcost = 0;
 321         int cost;
 322         int linelen = 0, spacewidth = 0, minspacewidth = 0;
 323         int nspaces;
 324         int thiswidth = (i == 0 ? width : subsequentwidth);
 325
 326         j = 0;
 327         nspaces = 0;
 328         while (i+j < nwords) {
 329             /*
 330              * See what happens if we put j+1 words on this line.
 331              */
 332             if (spacewidth) {
 333                 nspaces++;
 334                 minspacewidth = spacewidth;
 335             }
 336             linelen += spacewidth + wrapwords[i+j].width;
 337             spacewidth = wrapwords[i+j].spacewidth;
 338             j++;
 339             if (linelen > thiswidth) {
 340                 /*
 341                  * If we're over the width limit, abandon ship,
 342                  * _unless_ there is no best-effort yet (which will
 343                  * only happen if the first word is too long all by
 344                  * itself).
 345                  */
 346                 if (best > 0)
 347                     break;
 348             }
 349
 350             /*
 351              * Compute the cost of this line. The method of doing
 352              * this differs hugely depending on whether
 353              * natural_space is nonzero or not.
 354              */
 355             if (natural_space) {
 356                 if (!nspaces && linelen > thiswidth) {
 357                     /*
 358                      * Special case: if there are no spaces at all
 359                      * on the line because one single word is too
 360                      * long for its line, cost is zero because
 361                      * there's nothing we can do about it anyway.
 362                      */
 363                     cost = 0;
 364                 } else {
 365                     int shortfall = thiswidth - linelen;
 366                     int spaceextra = shortfall / (nspaces ? nspaces : 1);
 367                     int spaceshortfall = natural_space -
 368                         (minspacewidth + spaceextra);
 369
 370                     if (i+j == nwords && spaceshortfall < 0) {
 371                         /*
 372                          * Special case: on the very last line of
 373                          * the paragraph, we don't score penalty
 374                          * points for having to _stretch_ the line,
 375                          * since we won't stretch it anyway.
 376                          * However, we score penalties as normal
 377                          * for having to squeeze it.
 378                          */
 379                         cost = 0;
 380                     } else {
 381                         /*
 382                          * Squaring this number is tricky since
 383                          * it's liable to be quite big. Let's
 384                          * divide it through by 256.
 385                          */
 386                         int x = spaceshortfall >> 8;
 387                         int xf = spaceshortfall & 0xFF;
 388
 389                         /*
 390                          * Not counting strange variable-fixed-
 391                          * point oddities, we are computing
 392                          *
 393                          *   (x+xf)^2 = x^2 + 2*x*xf + xf*xf
 394                          *
 395                          * except that _our_ xf is 256 times the
 396                          * one listed there.
 397                          */
 398
 399                         cost = x * x;
 400                         cost += (2 * x * xf) >> 8;
 401                     }
 402                 }
 403             } else {
 404                 if (i+j == nwords) {
 405                     /*
 406                      * Special case: if we're at the very end of the
 407                      * paragraph, we don't score penalty points for the
 408                      * white space left on the line.
 409                      */
 410                     cost = 0;
 411                 } else {
 412                     cost = (thiswidth-linelen) * (thiswidth-linelen);
 413                 }
 414             }
 415
 416             /*
 417              * Add in the cost of wrapping all lines after this
 418              * point too.
 419              */
 420             if (i+j < nwords)
 421                 cost += wrapwords[i+j].cost;
 422
 423             /*
 424              * We compare bestcost >= cost, not bestcost > cost,
 425              * because in cases where the costs are identical we
 426              * want to try to look like the greedy algorithm,
 427              * because readers are likely to have spent a lot of
 428              * time looking at greedy-wrapped paragraphs and
 429              * there's no point violating the Principle of Least
 430              * Surprise if it doesn't actually gain anything.
 431              */
 432             if (best < 0 || bestcost >= cost) {
 433                 bestcost = cost;
 434                 best = j;
 435             }
 436         }
 437         /*
 438          * Now we know the optimal answer for this terminal
 439          * subsequence, so put it in wrapwords.
 440          */
 441         wrapwords[i].cost = bestcost;
 442         wrapwords[i].nwords = best;
 443     }
 444
 445     /*
 446      * We've wrapped the paragraph. Now build the output
 447      * `wrappedline' list.
 448      */
 449     i = 0;
 450     while (i < nwords) {
 451         wrappedline *w = mknew(wrappedline);
 452         *ptr = w;
 453         ptr = &w->next;
 454         w->next = NULL;
 455
 456         n = wrapwords[i].nwords;
 457         w->begin = wrapwords[i].begin;
 458         w->end = wrapwords[i+n-1].end;
 459
 460         /*
 461          * Count along the words to find nspaces and shortfall.
 462          */
 463         w->nspaces = 0;
 464         w->shortfall = width;
 465         for (j = 0; j < n; j++) {
 466             w->shortfall -= wrapwords[i+j].width;
 467             if (j < n-1 && wrapwords[i+j].spacewidth) {
 468                 w->nspaces++;
 469                 w->shortfall -= wrapwords[i+j].spacewidth;
 470             }
 471         }
 472         i += n;
 473     }
 474
 475     sfree(wrapwords);
 476
 477     return head;
 478 }
 479
 480 void wrap_free(wrappedline *w) {
 481     while (w) {
 482         wrappedline *t = w->next;
 483         sfree(w);
 484         w = t;
 485     }
 486 }
 487
 488 void cmdline_cfg_add(paragraph *cfg, char *string)
 489 {
 490     wchar_t *ustring;
 491     int upos, ulen, pos, len;
 492
 493     ulen = 0;
 494     while (cfg->keyword[ulen])
 495         ulen += 1 + ustrlen(cfg->keyword+ulen);
 496     len = 0;
 497     while (cfg->origkeyword[len])
 498         len += 1 + strlen(cfg->origkeyword+len);
 499
 500     ustring = ufroma_locale_dup(string);
 501
 502     upos = ulen;
 503     ulen += 2 + ustrlen(ustring);
 504     cfg->keyword = resize(cfg->keyword, ulen);
 505     ustrcpy(cfg->keyword+upos, ustring);
 506     cfg->keyword[ulen-1] = L'\0';
 507
 508     pos = len;
 509     len += 2 + strlen(string);
 510     cfg->origkeyword = resize(cfg->origkeyword, len);
 511     strcpy(cfg->origkeyword+pos, string);
 512     cfg->origkeyword[len-1] = '\0';
 513
 514     sfree(ustring);
 515 }
 516
 517 paragraph *cmdline_cfg_new(void)
 518 {
 519     paragraph *p;
 520
 521     p = mknew(paragraph);
 522     memset(p, 0, sizeof(*p));
 523     p->type = para_Config;
 524     p->next = NULL;
 525     p->fpos.filename = "<command line>";
 526     p->fpos.line = p->fpos.col = -1;
 527     p->keyword = ustrdup(L"\0");
 528     p->origkeyword = dupstr("\0");
 529
 530     return p;
 531 }
 532
 533 paragraph *cmdline_cfg_simple(char *string, ...)
 534 {
 535     va_list ap;
 536     char *s;
 537     paragraph *p;
 538
 539     p = cmdline_cfg_new();
 540     cmdline_cfg_add(p, string);
 541
 542     va_start(ap, string);
 543     while ((s = va_arg(ap, char *)) != NULL)
 544         cmdline_cfg_add(p, s);
 545     va_end(ap);
 546
 547     return p;
 548 }