X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/7136a6c7f094fa423c48ec319748c4fd7e1fa645..8f664e7e91c918cd13248f6b684580c4dd2cdb31:/misc.c

diff --git a/misc.c b/misc.c
index 0de6c2a..3f2483c 100644
--- a/misc.c
+++ b/misc.c
@@ -2,8 +2,13 @@
  * misc.c: miscellaneous useful items
  */
 
+#include <stdarg.h>
 #include "halibut.h"
 
+char *adv(char *s) {
+    return s + 1 + strlen(s);
+}
+
 struct stackTag {
     void **data;
     int sp;
@@ -13,7 +18,7 @@ struct stackTag {
 stack stk_new(void) {
     stack s;
 
-    s = mknew(struct stackTag);
+    s = snew(struct stackTag);
     s->sp = 0;
     s->size = 0;
     s->data = NULL;
@@ -29,7 +34,7 @@ void stk_free(stack s) {
 void stk_push(stack s, void *item) {
     if (s->size <= s->sp) {
 	s->size = s->sp + 32;
-	s->data = resize(s->data, s->size);
+	s->data = sresize(s->data, s->size, void *);
     }
     s->data[s->sp++] = item;
 }
@@ -57,48 +62,51 @@ const rdstringc empty_rdstringc = {0, 0, NULL};
 void rdadd(rdstring *rs, wchar_t c) {
     if (rs->pos >= rs->size-1) {
 	rs->size = rs->pos + 128;
-	rs->text = resize(rs->text, rs->size);
+	rs->text = sresize(rs->text, rs->size, wchar_t);
     }
     rs->text[rs->pos++] = c;
     rs->text[rs->pos] = 0;
 }
-void rdadds(rdstring *rs, wchar_t *p) {
+void rdadds(rdstring *rs, wchar_t const *p) {
     int len = ustrlen(p);
     if (rs->pos >= rs->size - len) {
 	rs->size = rs->pos + len + 128;
-	rs->text = resize(rs->text, rs->size);
+	rs->text = sresize(rs->text, rs->size, wchar_t);
     }
     ustrcpy(rs->text + rs->pos, p);
     rs->pos += len;
 }
 wchar_t *rdtrim(rdstring *rs) {
-    rs->text = resize(rs->text, rs->pos + 1);
+    rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
     return rs->text;
 }
 
 void rdaddc(rdstringc *rs, char c) {
     if (rs->pos >= rs->size-1) {
 	rs->size = rs->pos + 128;
-	rs->text = resize(rs->text, rs->size);
+	rs->text = sresize(rs->text, rs->size, char);
     }
     rs->text[rs->pos++] = c;
     rs->text[rs->pos] = 0;
 }
-void rdaddsc(rdstringc *rs, char *p) {
-    int len = strlen(p);
+void rdaddsc(rdstringc *rs, char const *p) {
+    rdaddsn(rs, p, strlen(p));
+}
+void rdaddsn(rdstringc *rs, char const *p, int len) {
     if (rs->pos >= rs->size - len) {
 	rs->size = rs->pos + len + 128;
-	rs->text = resize(rs->text, rs->size);
+	rs->text = sresize(rs->text, rs->size, char);
     }
-    strcpy(rs->text + rs->pos, p);
+    memcpy(rs->text + rs->pos, p, len);
     rs->pos += len;
+    rs->text[rs->pos] = 0;
 }
 char *rdtrimc(rdstringc *rs) {
-    rs->text = resize(rs->text, rs->pos + 1);
+    rs->text = sresize(rs->text, rs->pos + 1, char);
     return rs->text;
 }
 
-int compare_wordlists(word *a, word *b) {
+static int compare_wordlists_literally(word *a, word *b) {
     int t;
     while (a && b) {
 	if (a->type != b->type)
@@ -113,7 +121,7 @@ int compare_wordlists(word *a, word *b) {
 		if (c)
 		    return c;
 	    }
-	    c = compare_wordlists(a->alt, b->alt);
+	    c = compare_wordlists_literally(a->alt, b->alt);
 	    if (c)
 		return c;
 	    a = a->next;
@@ -121,7 +129,7 @@ int compare_wordlists(word *a, word *b) {
 	} else {
 	    wchar_t *ap = a->text, *bp = b->text;
 	    while (*ap && *bp) {
-		wchar_t ac = utolower(*ap), bc = utolower(*bp);
+		wchar_t ac = *ap, bc = *bp;
 		if (ac != bc)
 		    return (ac < bc ? -1 : +1);
 		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
@@ -142,28 +150,154 @@ int compare_wordlists(word *a, word *b) {
 	return 0;
 }
 
-void mark_attr_ends(paragraph *sourceform) {
-    paragraph *p;
-    word *w, *wp;
-    for (p = sourceform; p; p = p->next) {
-	wp = NULL;
-	for (w = p->words; w; w = w->next) {
-	    if (isattr(w->type)) {
-		int before = (wp && isattr(wp->type) &&
-			      sameattr(wp->type, w->type));
-		int after = (w->next && isattr(w->next->type) &&
-			     sameattr(w->next->type, w->type));
-		w->aux |= (before ?
-			   (after ? attr_Always : attr_Last) :
-			   (after ? attr_First : attr_Only));
+int compare_wordlists(word *a, word *b) {
+    /*
+     * First we compare only the alphabetic content of the word
+     * lists, with case not a factor. If that comes out equal,
+     * _then_ we compare the word lists literally.
+     */
+    struct {
+	word *w;
+	int i;
+	wchar_t c;
+    } pos[2];
+
+    pos[0].w = a;
+    pos[1].w = b;
+    pos[0].i = pos[1].i = 0;
+
+    while (1) {
+	/*
+	 * Find the next alphabetic character in each word list.
+	 */
+	int k;
+
+	for (k = 0; k < 2; k++) {
+	    /*
+	     * Advance until we hit either an alphabetic character
+	     * or the end of the word list.
+	     */
+	    while (1) {
+		if (!pos[k].w) {
+		    /* End of word list. */
+		    pos[k].c = 0;
+		    break;
+		} else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
+		    /* No characters remaining in this word; move on. */
+		    pos[k].w = pos[k].w->next;
+		    pos[k].i = 0;
+		} else if (!uisalpha(pos[k].w->text[pos[k].i])) {
+		    /* This character isn't alphabetic; move on. */
+		    pos[k].i++;
+		} else {
+		    /* We have an alphabetic! Lowercase it and continue. */
+		    pos[k].c = utolower(pos[k].w->text[pos[k].i]);
+		    break;
+		}
 	    }
-	    wp = w;
 	}
+
+#ifdef HAS_WCSCOLL
+	{
+	    wchar_t a[2], b[2];
+	    int ret;
+
+	    a[0] = pos[0].c;
+	    b[0] = pos[1].c;
+	    a[1] = b[1] = L'\0';
+
+	    ret = wcscoll(a, b);
+	    if (ret)
+		return ret;
+	}
+#else
+	if (pos[0].c < pos[1].c)
+	    return -1;
+	else if (pos[0].c > pos[1].c)
+	    return +1;
+#endif
+
+	if (!pos[0].c)
+	    break;		       /* they're equal */
+
+	pos[0].i++;
+	pos[1].i++;
+    }
+
+    /*
+     * If we reach here, the strings were alphabetically equal, so
+     * compare in more detail.
+     */
+    return compare_wordlists_literally(a, b);
+}
+
+void mark_attr_ends(word *words)
+{
+    word *w, *wp;
+
+    wp = NULL;
+    for (w = words; w; w = w->next) {
+	int both;
+	if (!isvis(w->type))
+	    /* Invisible elements should not affect this calculation */
+	    continue;
+	both = (isattr(w->type) &&
+		wp && isattr(wp->type) &&
+		sameattr(wp->type, w->type));
+	w->aux |= both ? attr_Always : attr_First;
+	if (wp && !both) {
+	    /* If previous considered word turns out to have been
+	     * the end of a run, tidy it up. */
+	    int wp_attr = attraux(wp->aux);
+	    wp->aux = (wp->aux & ~attr_mask) |
+		((wp_attr == attr_Always) ? attr_Last
+			 /* attr_First */ : attr_Only);
+	}
+	wp = w;
+    }
+
+    /* Tidy up last word touched */
+    if (wp) {
+	int wp_attr = attraux(wp->aux);
+	wp->aux = (wp->aux & ~attr_mask) |
+	    ((wp_attr == attr_Always) ? attr_Last
+		     /* attr_First */ : attr_Only);
     }
 }
 
+/*
+ * This function implements the optimal paragraph wrapping
+ * algorithm, pretty much as used in TeX. A cost function is
+ * defined for each line of the wrapped paragraph (typically some
+ * convex function of the difference between the line's length and
+ * its desired length), and a dynamic programming approach is used
+ * to optimise globally across all possible layouts of the
+ * paragraph to find the one with the minimum total cost.
+ * 
+ * The function as implemented here gives a choice of two options
+ * for the cost function:
+ * 
+ *  - If `natural_space' is zero, then the algorithm attempts to
+ *    make each line the maximum possible width (either `width' or
+ *    `subsequentwidth' depending on whether it's the first line of
+ *    the paragraph or not), and the cost function is simply the
+ *    square of the unused space at the end of each line. This is a
+ *    simple mechanism suitable for use in fixed-pitch environments
+ *    such as plain text displayed on a terminal.
+ * 
+ *  - However, if `natural_space' is positive, the algorithm
+ *    assumes the medium is fully graphical and that the width of
+ *    space characters can be adjusted finely, and it attempts to
+ *    make each _space character_ the width given in
+ *    `natural_space'. (The provided width function should return
+ *    the _minimum_ acceptable width of a space character in this
+ *    case.) Therefore, the cost function for a line is dependent
+ *    on the number of spaces on that line as well as the amount by
+ *    which the line width differs from the optimum.
+ */
 wrappedline *wrap_para(word *text, int width, int subsequentwidth,
-		       int (*widthfn)(word *)) {
+		       int (*widthfn)(void *, word *), void *ctx,
+		       int natural_space) {
     wrappedline *head = NULL, **ptr = &head;
     int nwords, wordsize;
     struct wrapword {
@@ -188,7 +322,7 @@ wrappedline *wrap_para(word *text, int width, int subsequentwidth,
 	wrapwords[nwords].width = 0;
 	wrapwords[nwords].begin = text;
 	while (text) {
-	    wrapwords[nwords].width += widthfn(text);
+	    wrapwords[nwords].width += widthfn(ctx, text);
 	    wrapwords[nwords].end = text->next;
 	    if (text->next && (text->next->type == word_WhiteSpace ||
 			       text->next->type == word_EmphSpace ||
@@ -198,7 +332,7 @@ wrappedline *wrap_para(word *text, int width, int subsequentwidth,
 	}
 	if (text && text->next && (text->next->type == word_WhiteSpace ||
 			   text->next->type == word_EmphSpace)) {
-	    wrapwords[nwords].spacewidth = widthfn(text->next);
+	    wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
 	    text = text->next;
 	} else {
 	    wrapwords[nwords].spacewidth = 0;
@@ -217,18 +351,20 @@ wrappedline *wrap_para(word *text, int width, int subsequentwidth,
 	int best = -1;
 	int bestcost = 0;
 	int cost;
-	int linelen = 0, spacewidth = 0;
-	int seenspace;
+	int linelen = 0, spacewidth = 0, minspacewidth = 0;
+	int nspaces;
 	int thiswidth = (i == 0 ? width : subsequentwidth);
 
 	j = 0;
-	seenspace = 0;
+	nspaces = 0;
 	while (i+j < nwords) {
 	    /*
 	     * See what happens if we put j+1 words on this line.
 	     */
-	    if (spacewidth)
-		seenspace = 1;
+	    if (spacewidth) {
+		nspaces++;
+		minspacewidth = spacewidth;
+	    }
 	    linelen += spacewidth + wrapwords[i+j].width;
 	    spacewidth = wrapwords[i+j].spacewidth;
 	    j++;
@@ -242,17 +378,80 @@ wrappedline *wrap_para(word *text, int width, int subsequentwidth,
 		if (best > 0)
 		    break;
 	    }
-	    if (i+j == nwords) {
-		/*
-		 * Special case: if we're at the very end of the
-		 * paragraph, we don't score penalty points for the
-		 * white space left on the line.
-		 */
-		cost = 0;
+
+	    /*
+	     * Compute the cost of this line. The method of doing
+	     * this differs hugely depending on whether
+	     * natural_space is nonzero or not.
+	     */
+	    if (natural_space) {
+		if (!nspaces && linelen > thiswidth) {
+		    /*
+		     * Special case: if there are no spaces at all
+		     * on the line because one single word is too
+		     * long for its line, cost is zero because
+		     * there's nothing we can do about it anyway.
+		     */
+		    cost = 0;
+		} else {
+		    int shortfall = thiswidth - linelen;
+		    int spaceextra = shortfall / (nspaces ? nspaces : 1);
+		    int spaceshortfall = natural_space -
+			(minspacewidth + spaceextra);
+
+		    if (i+j == nwords && spaceshortfall < 0) {
+			/*
+			 * Special case: on the very last line of
+			 * the paragraph, we don't score penalty
+			 * points for having to _stretch_ the line,
+			 * since we won't stretch it anyway.
+			 * However, we score penalties as normal
+			 * for having to squeeze it.
+			 */
+			cost = 0;
+		    } else {
+			/*
+			 * Squaring this number is tricky since
+			 * it's liable to be quite big. Let's
+			 * divide it through by 256.
+			 */
+			int x = spaceshortfall >> 8;
+			int xf = spaceshortfall & 0xFF;
+
+			/*
+			 * Not counting strange variable-fixed-
+			 * point oddities, we are computing
+			 * 
+			 *   (x+xf)^2 = x^2 + 2*x*xf + xf*xf
+			 * 
+			 * except that _our_ xf is 256 times the
+			 * one listed there.
+			 */
+
+			cost = x * x;
+			cost += (2 * x * xf) >> 8;
+		    }
+		}
 	    } else {
-		cost = (thiswidth-linelen) * (thiswidth-linelen);
-		cost += wrapwords[i+j].cost;
+		if (i+j == nwords) {
+		    /*
+		     * Special case: if we're at the very end of the
+		     * paragraph, we don't score penalty points for the
+		     * white space left on the line.
+		     */
+		    cost = 0;
+		} else {
+		    cost = (thiswidth-linelen) * (thiswidth-linelen);
+		}
 	    }
+
+	    /*
+	     * Add in the cost of wrapping all lines after this
+	     * point too.
+	     */
+	    if (i+j < nwords)
+		cost += wrapwords[i+j].cost;
+
 	    /*
 	     * We compare bestcost >= cost, not bestcost > cost,
 	     * because in cases where the costs are identical we
@@ -281,7 +480,7 @@ wrappedline *wrap_para(word *text, int width, int subsequentwidth,
      */
     i = 0;
     while (i < nwords) {
-	wrappedline *w = mknew(wrappedline);
+	wrappedline *w = snew(wrappedline);
 	*ptr = w;
 	ptr = &w->next;
 	w->next = NULL;
@@ -317,3 +516,65 @@ void wrap_free(wrappedline *w) {
 	w = t;
     }
 }
+
+void cmdline_cfg_add(paragraph *cfg, char *string)
+{
+    wchar_t *ustring;
+    int upos, ulen, pos, len;
+
+    ulen = 0;
+    while (cfg->keyword[ulen])
+	ulen += 1 + ustrlen(cfg->keyword+ulen);
+    len = 0;
+    while (cfg->origkeyword[len])
+	len += 1 + strlen(cfg->origkeyword+len);
+
+    ustring = ufroma_locale_dup(string);
+
+    upos = ulen;
+    ulen += 2 + ustrlen(ustring);
+    cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
+    ustrcpy(cfg->keyword+upos, ustring);
+    cfg->keyword[ulen-1] = L'\0';
+
+    pos = len;
+    len += 2 + strlen(string);
+    cfg->origkeyword = sresize(cfg->origkeyword, len, char);
+    strcpy(cfg->origkeyword+pos, string);
+    cfg->origkeyword[len-1] = '\0';
+
+    sfree(ustring);
+}
+
+paragraph *cmdline_cfg_new(void)
+{
+    paragraph *p;
+
+    p = snew(paragraph);
+    memset(p, 0, sizeof(*p));
+    p->type = para_Config;
+    p->next = NULL;
+    p->fpos.filename = "<command line>";
+    p->fpos.line = p->fpos.col = -1;
+    p->keyword = ustrdup(L"\0");
+    p->origkeyword = dupstr("\0");
+
+    return p;
+}
+
+paragraph *cmdline_cfg_simple(char *string, ...)
+{
+    va_list ap;
+    char *s;
+    paragraph *p;
+
+    p = cmdline_cfg_new();
+    cmdline_cfg_add(p, string);
+
+    va_start(ap, string);
+    while ((s = va_arg(ap, char *)) != NULL)
+	cmdline_cfg_add(p, s);
+    va_end(ap);
+
+    return p;
+}