mdw@git.distorted.org.uk Git - sgt/halibut/blob - misc.c

   1 /*
   2  * misc.c: miscellaneous useful items
   3  */
   4
   5 #include "halibut.h"
   6
   7 struct stackTag {
   8     void **data;
   9     int sp;
  10     int size;
  11 };
  12
  13 stack stk_new(void) {
  14     stack s;
  15
  16     s = mknew(struct stackTag);
  17     s->sp = 0;
  18     s->size = 0;
  19     s->data = NULL;
  20
  21     return s;
  22 }
  23
  24 void stk_free(stack s) {
  25     sfree(s->data);
  26     sfree(s);
  27 }
  28
  29 void stk_push(stack s, void *item) {
  30     if (s->size <= s->sp) {
  31         s->size = s->sp + 32;
  32         s->data = resize(s->data, s->size);
  33     }
  34     s->data[s->sp++] = item;
  35 }
  36
  37 void *stk_pop(stack s) {
  38     if (s->sp > 0)
  39         return s->data[--s->sp];
  40     else
  41         return NULL;
  42 }
  43
  44 void *stk_top(stack s) {
  45     if (s->sp > 0)
  46         return s->data[s->sp-1];
  47     else
  48         return NULL;
  49 }
  50
  51 /*
  52  * Small routines to amalgamate a string from an input source.
  53  */
  54 const rdstring empty_rdstring = {0, 0, NULL};
  55 const rdstringc empty_rdstringc = {0, 0, NULL};
  56
  57 void rdadd(rdstring *rs, wchar_t c) {
  58     if (rs->pos >= rs->size-1) {
  59         rs->size = rs->pos + 128;
  60         rs->text = resize(rs->text, rs->size);
  61     }
  62     rs->text[rs->pos++] = c;
  63     rs->text[rs->pos] = 0;
  64 }
  65 void rdadds(rdstring *rs, wchar_t const *p) {
  66     int len = ustrlen(p);
  67     if (rs->pos >= rs->size - len) {
  68         rs->size = rs->pos + len + 128;
  69         rs->text = resize(rs->text, rs->size);
  70     }
  71     ustrcpy(rs->text + rs->pos, p);
  72     rs->pos += len;
  73 }
  74 wchar_t *rdtrim(rdstring *rs) {
  75     rs->text = resize(rs->text, rs->pos + 1);
  76     return rs->text;
  77 }
  78
  79 void rdaddc(rdstringc *rs, char c) {
  80     if (rs->pos >= rs->size-1) {
  81         rs->size = rs->pos + 128;
  82         rs->text = resize(rs->text, rs->size);
  83     }
  84     rs->text[rs->pos++] = c;
  85     rs->text[rs->pos] = 0;
  86 }
  87 void rdaddsc(rdstringc *rs, char const *p) {
  88     int len = strlen(p);
  89     if (rs->pos >= rs->size - len) {
  90         rs->size = rs->pos + len + 128;
  91         rs->text = resize(rs->text, rs->size);
  92     }
  93     strcpy(rs->text + rs->pos, p);
  94     rs->pos += len;
  95 }
  96 char *rdtrimc(rdstringc *rs) {
  97     rs->text = resize(rs->text, rs->pos + 1);
  98     return rs->text;
  99 }
 100
 101 static int compare_wordlists_literally(word *a, word *b) {
 102     int t;
 103     while (a && b) {
 104         if (a->type != b->type)
 105             return (a->type < b->type ? -1 : +1);   /* FIXME? */
 106         t = a->type;
 107         if ((t != word_Normal && t != word_Code &&
 108              t != word_WeakCode && t != word_Emph) ||
 109             a->alt || b->alt) {
 110             int c;
 111             if (a->text && b->text) {
 112                 c = ustricmp(a->text, b->text);
 113                 if (c)
 114                     return c;
 115             }
 116             c = compare_wordlists_literally(a->alt, b->alt);
 117             if (c)
 118                 return c;
 119             a = a->next;
 120             b = b->next;
 121         } else {
 122             wchar_t *ap = a->text, *bp = b->text;
 123             while (*ap && *bp) {
 124                 wchar_t ac = utolower(*ap), bc = utolower(*bp);
 125                 if (ac != bc)
 126                     return (ac < bc ? -1 : +1);
 127                 if (!*++ap && a->next && a->next->type == t && !a->next->alt)
 128                     a = a->next, ap = a->text;
 129                 if (!*++bp && b->next && b->next->type == t && !b->next->alt)
 130                     b = b->next, bp = b->text;
 131             }
 132             if (*ap || *bp)
 133                 return (*ap ? +1 : -1);
 134             a = a->next;
 135             b = b->next;
 136         }
 137     }
 138
 139     if (a || b)
 140         return (a ? +1 : -1);
 141     else
 142         return 0;
 143 }
 144
 145 int compare_wordlists(word *a, word *b) {
 146     /*
 147      * First we compare only the alphabetic content of the word
 148      * lists, with case not a factor. If that comes out equal,
 149      * _then_ we compare the word lists literally.
 150      */
 151     struct {
 152         word *w;
 153         int i;
 154         wchar_t c;
 155     } pos[2];
 156
 157     pos[0].w = a;
 158     pos[1].w = b;
 159     pos[0].i = pos[1].i = 0;
 160
 161     while (1) {
 162         /*
 163          * Find the next alphabetic character in each word list.
 164          */
 165         int k;
 166
 167         for (k = 0; k < 2; k++) {
 168             /*
 169              * Advance until we hit either an alphabetic character
 170              * or the end of the word list.
 171              */
 172             while (1) {
 173                 if (!pos[k].w) {
 174                     /* End of word list. */
 175                     pos[k].c = 0;
 176                     break;
 177                 } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
 178                     /* No characters remaining in this word; move on. */
 179                     pos[k].w = pos[k].w->next;
 180                     pos[k].i = 0;
 181                 } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
 182                     /* This character isn't alphabetic; move on. */
 183                     pos[k].i++;
 184                 } else {
 185                     /* We have an alphabetic! Lowercase it and continue. */
 186                     pos[k].c = utolower(pos[k].w->text[pos[k].i]);
 187                     break;
 188                 }
 189             }
 190         }
 191
 192         if (pos[0].c < pos[1].c)
 193             return -1;
 194         else if (pos[0].c > pos[1].c)
 195             return +1;
 196
 197         if (!pos[0].c)
 198             break;                     /* they're equal */
 199
 200         pos[0].i++;
 201         pos[1].i++;
 202     }
 203
 204     /*
 205      * If we reach here, the strings were alphabetically equal, so
 206      * compare in more detail.
 207      */
 208     return compare_wordlists_literally(a, b);
 209 }
 210
 211 void mark_attr_ends(paragraph *sourceform) {
 212     paragraph *p;
 213     word *w, *wp;
 214     for (p = sourceform; p; p = p->next) {
 215         wp = NULL;
 216         for (w = p->words; w; w = w->next) {
 217             if (isattr(w->type)) {
 218                 int before = (wp && isattr(wp->type) &&
 219                               sameattr(wp->type, w->type));
 220                 int after = (w->next && isattr(w->next->type) &&
 221                              sameattr(w->next->type, w->type));
 222                 w->aux |= (before ?
 223                            (after ? attr_Always : attr_Last) :
 224                            (after ? attr_First : attr_Only));
 225             }
 226             wp = w;
 227         }
 228     }
 229 }
 230
 231 /*
 232  * This function implements the optimal paragraph wrapping
 233  * algorithm, pretty much as used in TeX. A cost function is
 234  * defined for each line of the wrapped paragraph (typically some
 235  * convex function of the difference between the line's length and
 236  * its desired length), and a dynamic programming approach is used
 237  * to optimise globally across all possible layouts of the
 238  * paragraph to find the one with the minimum total cost.
 239  *
 240  * The function as implemented here gives a choice of two options
 241  * for the cost function:
 242  *
 243  *  - If `natural_space' is zero, then the algorithm attempts to
 244  *    make each line the maximum possible width (either `width' or
 245  *    `subsequentwidth' depending on whether it's the first line of
 246  *    the paragraph or not), and the cost function is simply the
 247  *    square of the unused space at the end of each line. This is a
 248  *    simple mechanism suitable for use in fixed-pitch environments
 249  *    such as plain text displayed on a terminal.
 250  *
 251  *  - However, if `natural_space' is positive, the algorithm
 252  *    assumes the medium is fully graphical and that the width of
 253  *    space characters can be adjusted finely, and it attempts to
 254  *    make each _space character_ the width given in
 255  *    `natural_space'. (The provided width function should return
 256  *    the _minimum_ acceptable width of a space character in this
 257  *    case.) Therefore, the cost function for a line is dependent
 258  *    on the number of spaces on that line as well as the amount by
 259  *    which the line width differs from the optimum.
 260  */
 261 wrappedline *wrap_para(word *text, int width, int subsequentwidth,
 262                        int (*widthfn)(void *, word *), void *ctx,
 263                        int natural_space) {
 264     wrappedline *head = NULL, **ptr = &head;
 265     int nwords, wordsize;
 266     struct wrapword {
 267         word *begin, *end;
 268         int width;
 269         int spacewidth;
 270         int cost;
 271         int nwords;
 272     } *wrapwords;
 273     int i, j, n;
 274
 275     /*
 276      * Break the line up into wrappable components.
 277      */
 278     nwords = wordsize = 0;
 279     wrapwords = NULL;
 280     while (text) {
 281         if (nwords >= wordsize) {
 282             wordsize = nwords + 64;
 283             wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
 284         }
 285         wrapwords[nwords].width = 0;
 286         wrapwords[nwords].begin = text;
 287         while (text) {
 288             wrapwords[nwords].width += widthfn(ctx, text);
 289             wrapwords[nwords].end = text->next;
 290             if (text->next && (text->next->type == word_WhiteSpace ||
 291                                text->next->type == word_EmphSpace ||
 292                                text->breaks))
 293                 break;
 294             text = text->next;
 295         }
 296         if (text && text->next && (text->next->type == word_WhiteSpace ||
 297                            text->next->type == word_EmphSpace)) {
 298             wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
 299             text = text->next;
 300         } else {
 301             wrapwords[nwords].spacewidth = 0;
 302         }
 303         nwords++;
 304         if (text)
 305             text = text->next;
 306     }
 307
 308     /*
 309      * Perform the dynamic wrapping algorithm: work backwards from
 310      * nwords-1, determining the optimal wrapping for each terminal
 311      * subsequence of the paragraph.
 312      */
 313     for (i = nwords; i-- ;) {
 314         int best = -1;
 315         int bestcost = 0;
 316         int cost;
 317         int linelen = 0, spacewidth = 0, minspacewidth = 0;
 318         int nspaces;
 319         int thiswidth = (i == 0 ? width : subsequentwidth);
 320
 321         j = 0;
 322         nspaces = 0;
 323         while (i+j < nwords) {
 324             /*
 325              * See what happens if we put j+1 words on this line.
 326              */
 327             if (spacewidth) {
 328                 nspaces++;
 329                 minspacewidth = spacewidth;
 330             }
 331             linelen += spacewidth + wrapwords[i+j].width;
 332             spacewidth = wrapwords[i+j].spacewidth;
 333             j++;
 334             if (linelen > thiswidth) {
 335                 /*
 336                  * If we're over the width limit, abandon ship,
 337                  * _unless_ there is no best-effort yet (which will
 338                  * only happen if the first word is too long all by
 339                  * itself).
 340                  */
 341                 if (best > 0)
 342                     break;
 343             }
 344
 345             /*
 346              * Compute the cost of this line. The method of doing
 347              * this differs hugely depending on whether
 348              * natural_space is nonzero or not.
 349              */
 350             if (natural_space) {
 351                 if (!nspaces && linelen > thiswidth) {
 352                     /*
 353                      * Special case: if there are no spaces at all
 354                      * on the line because one single word is too
 355                      * long for its line, cost is zero because
 356                      * there's nothing we can do about it anyway.
 357                      */
 358                     cost = 0;
 359                 } else {
 360                     int shortfall = thiswidth - linelen;
 361                     int spaceextra = shortfall / (nspaces ? nspaces : 1);
 362                     int spaceshortfall = natural_space -
 363                         (minspacewidth + spaceextra);
 364
 365                     if (i+j == nwords && spaceshortfall < 0) {
 366                         /*
 367                          * Special case: on the very last line of
 368                          * the paragraph, we don't score penalty
 369                          * points for having to _stretch_ the line,
 370                          * since we won't stretch it anyway.
 371                          * However, we score penalties as normal
 372                          * for having to squeeze it.
 373                          */
 374                         cost = 0;
 375                     } else {
 376                         /*
 377                          * Squaring this number is tricky since
 378                          * it's liable to be quite big. Let's
 379                          * divide it through by 256.
 380                          */
 381                         int x = spaceshortfall >> 8;
 382                         int xf = spaceshortfall & 0xFF;
 383
 384                         /*
 385                          * Not counting strange variable-fixed-
 386                          * point oddities, we are computing
 387                          *
 388                          *   (x+xf)^2 = x^2 + 2*x*xf + xf*xf
 389                          *
 390                          * except that _our_ xf is 256 times the
 391                          * one listed there.
 392                          */
 393
 394                         cost = x * x;
 395                         cost += (2 * x * xf) >> 8;
 396                     }
 397                 }
 398             } else {
 399                 if (i+j == nwords) {
 400                     /*
 401                      * Special case: if we're at the very end of the
 402                      * paragraph, we don't score penalty points for the
 403                      * white space left on the line.
 404                      */
 405                     cost = 0;
 406                 } else {
 407                     cost = (thiswidth-linelen) * (thiswidth-linelen);
 408                 }
 409             }
 410
 411             /*
 412              * Add in the cost of wrapping all lines after this
 413              * point too.
 414              */
 415             if (i+j < nwords)
 416                 cost += wrapwords[i+j].cost;
 417
 418             /*
 419              * We compare bestcost >= cost, not bestcost > cost,
 420              * because in cases where the costs are identical we
 421              * want to try to look like the greedy algorithm,
 422              * because readers are likely to have spent a lot of
 423              * time looking at greedy-wrapped paragraphs and
 424              * there's no point violating the Principle of Least
 425              * Surprise if it doesn't actually gain anything.
 426              */
 427             if (best < 0 || bestcost >= cost) {
 428                 bestcost = cost;
 429                 best = j;
 430             }
 431         }
 432         /*
 433          * Now we know the optimal answer for this terminal
 434          * subsequence, so put it in wrapwords.
 435          */
 436         wrapwords[i].cost = bestcost;
 437         wrapwords[i].nwords = best;
 438     }
 439
 440     /*
 441      * We've wrapped the paragraph. Now build the output
 442      * `wrappedline' list.
 443      */
 444     i = 0;
 445     while (i < nwords) {
 446         wrappedline *w = mknew(wrappedline);
 447         *ptr = w;
 448         ptr = &w->next;
 449         w->next = NULL;
 450
 451         n = wrapwords[i].nwords;
 452         w->begin = wrapwords[i].begin;
 453         w->end = wrapwords[i+n-1].end;
 454
 455         /*
 456          * Count along the words to find nspaces and shortfall.
 457          */
 458         w->nspaces = 0;
 459         w->shortfall = width;
 460         for (j = 0; j < n; j++) {
 461             w->shortfall -= wrapwords[i+j].width;
 462             if (j < n-1 && wrapwords[i+j].spacewidth) {
 463                 w->nspaces++;
 464                 w->shortfall -= wrapwords[i+j].spacewidth;
 465             }
 466         }
 467         i += n;
 468     }
 469
 470     sfree(wrapwords);
 471
 472     return head;
 473 }
 474
 475 void wrap_free(wrappedline *w) {
 476     while (w) {
 477         wrappedline *t = w->next;
 478         sfree(w);
 479         w = t;
 480     }
 481 }