[sgt/halibut] / misc.c

/*
 * misc.c: miscellaneous useful items
 */

#include "halibut.h"

struct stackTag {
    void **data;
    int sp;
    int size;
};

stack stk_new(void) {
    stack s;

    s = mknew(struct stackTag);
    s->sp = 0;
    s->size = 0;
    s->data = NULL;

    return s;
}

void stk_free(stack s) {
    sfree(s->data);
    sfree(s);
}

void stk_push(stack s, void *item) {
    if (s->size <= s->sp) {
	s->size = s->sp + 32;
	s->data = resize(s->data, s->size);
    }
    s->data[s->sp++] = item;
}

void *stk_pop(stack s) {
    if (s->sp > 0)
	return s->data[--s->sp];
    else
	return NULL;
}

void *stk_top(stack s) {
    if (s->sp > 0)
	return s->data[s->sp-1];
    else
	return NULL;
}

/*
 * Small routines to amalgamate a string from an input source.
 */
const rdstring empty_rdstring = {0, 0, NULL};
const rdstringc empty_rdstringc = {0, 0, NULL};

void rdadd(rdstring *rs, wchar_t c) {
    if (rs->pos >= rs->size-1) {
	rs->size = rs->pos + 128;
	rs->text = resize(rs->text, rs->size);
    }
    rs->text[rs->pos++] = c;
    rs->text[rs->pos] = 0;
}
void rdadds(rdstring *rs, wchar_t *p) {
    int len = ustrlen(p);
    if (rs->pos >= rs->size - len) {
	rs->size = rs->pos + len + 128;
	rs->text = resize(rs->text, rs->size);
    }
    ustrcpy(rs->text + rs->pos, p);
    rs->pos += len;
}
wchar_t *rdtrim(rdstring *rs) {
    rs->text = resize(rs->text, rs->pos + 1);
    return rs->text;
}

void rdaddc(rdstringc *rs, char c) {
    if (rs->pos >= rs->size-1) {
	rs->size = rs->pos + 128;
	rs->text = resize(rs->text, rs->size);
    }
    rs->text[rs->pos++] = c;
    rs->text[rs->pos] = 0;
}
void rdaddsc(rdstringc *rs, char *p) {
    int len = strlen(p);
    if (rs->pos >= rs->size - len) {
	rs->size = rs->pos + len + 128;
	rs->text = resize(rs->text, rs->size);
    }
    strcpy(rs->text + rs->pos, p);
    rs->pos += len;
}
char *rdtrimc(rdstringc *rs) {
    rs->text = resize(rs->text, rs->pos + 1);
    return rs->text;
}

static int compare_wordlists_literally(word *a, word *b) {
    int t;
    while (a && b) {
	if (a->type != b->type)
	    return (a->type < b->type ? -1 : +1);   /* FIXME? */
	t = a->type;
	if ((t != word_Normal && t != word_Code &&
	     t != word_WeakCode && t != word_Emph) ||
	    a->alt || b->alt) {
	    int c;
	    if (a->text && b->text) {
		c = ustricmp(a->text, b->text);
		if (c)
		    return c;
	    }
	    c = compare_wordlists_literally(a->alt, b->alt);
	    if (c)
		return c;
	    a = a->next;
	    b = b->next;
	} else {
	    wchar_t *ap = a->text, *bp = b->text;
	    while (*ap && *bp) {
		wchar_t ac = utolower(*ap), bc = utolower(*bp);
		if (ac != bc)
		    return (ac < bc ? -1 : +1);
		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
		    a = a->next, ap = a->text;
		if (!*++bp && b->next && b->next->type == t && !b->next->alt)
		    b = b->next, bp = b->text;
	    }
	    if (*ap || *bp)
		return (*ap ? +1 : -1);
	    a = a->next;
	    b = b->next;
	}
    }

    if (a || b)
	return (a ? +1 : -1);
    else
	return 0;
}

int compare_wordlists(word *a, word *b) {
    /*
     * First we compare only the alphabetic content of the word
     * lists, with case not a factor. If that comes out equal,
     * _then_ we compare the word lists literally.
     */
    struct {
	word *w;
	int i;
	wchar_t c;
    } pos[2];

    pos[0].w = a;
    pos[1].w = b;
    pos[0].i = pos[1].i = 0;

    while (1) {
	/*
	 * Find the next alphabetic character in each word list.
	 */
	int k;

	for (k = 0; k < 2; k++) {
	    /*
	     * Advance until we hit either an alphabetic character
	     * or the end of the word list.
	     */
	    while (1) {
		if (!pos[k].w) {
		    /* End of word list. */
		    pos[k].c = 0;
		    break;
		} else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
		    /* No characters remaining in this word; move on. */
		    pos[k].w = pos[k].w->next;
		    pos[k].i = 0;
		} else if (!uisalpha(pos[k].w->text[pos[k].i])) {
		    /* This character isn't alphabetic; move on. */
		    pos[k].i++;
		} else {
		    /* We have an alphabetic! Lowercase it and continue. */
		    pos[k].c = utolower(pos[k].w->text[pos[k].i]);
		    break;
		}
	    }
	}

	if (pos[0].c < pos[1].c)
	    return -1;
	else if (pos[0].c > pos[1].c)
	    return +1;

	if (!pos[0].c)
	    break;		       /* they're equal */

	pos[0].i++;
	pos[1].i++;
    }

    /*
     * If we reach here, the strings were alphabetically equal, so
     * compare in more detail.
     */
    return compare_wordlists_literally(a, b);
}

void mark_attr_ends(paragraph *sourceform) {
    paragraph *p;
    word *w, *wp;
    for (p = sourceform; p; p = p->next) {
	wp = NULL;
	for (w = p->words; w; w = w->next) {
	    if (isattr(w->type)) {
		int before = (wp && isattr(wp->type) &&
			      sameattr(wp->type, w->type));
		int after = (w->next && isattr(w->next->type) &&
			     sameattr(w->next->type, w->type));
		w->aux |= (before ?
			   (after ? attr_Always : attr_Last) :
			   (after ? attr_First : attr_Only));
	    }
	    wp = w;
	}
    }
}

wrappedline *wrap_para(word *text, int width, int subsequentwidth,
		       int (*widthfn)(word *)) {
    wrappedline *head = NULL, **ptr = &head;
    int nwords, wordsize;
    struct wrapword {
	word *begin, *end;
	int width;
	int spacewidth;
	int cost;
	int nwords;
    } *wrapwords;
    int i, j, n;

    /*
     * Break the line up into wrappable components.
     */
    nwords = wordsize = 0;
    wrapwords = NULL;
    while (text) {
	if (nwords >= wordsize) {
	    wordsize = nwords + 64;
	    wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
	}
	wrapwords[nwords].width = 0;
	wrapwords[nwords].begin = text;
	while (text) {
	    wrapwords[nwords].width += widthfn(text);
	    wrapwords[nwords].end = text->next;
	    if (text->next && (text->next->type == word_WhiteSpace ||
			       text->next->type == word_EmphSpace ||
			       text->breaks))
		break;
	    text = text->next;
	}
	if (text && text->next && (text->next->type == word_WhiteSpace ||
			   text->next->type == word_EmphSpace)) {
	    wrapwords[nwords].spacewidth = widthfn(text->next);
	    text = text->next;
	} else {
	    wrapwords[nwords].spacewidth = 0;
	}
	nwords++;
	if (text)
	    text = text->next;
    }

    /*
     * Perform the dynamic wrapping algorithm: work backwards from
     * nwords-1, determining the optimal wrapping for each terminal
     * subsequence of the paragraph.
     */
    for (i = nwords; i-- ;) {
	int best = -1;
	int bestcost = 0;
	int cost;
	int linelen = 0, spacewidth = 0;
	int seenspace;
	int thiswidth = (i == 0 ? width : subsequentwidth);

	j = 0;
	seenspace = 0;
	while (i+j < nwords) {
	    /*
	     * See what happens if we put j+1 words on this line.
	     */
	    if (spacewidth)
		seenspace = 1;
	    linelen += spacewidth + wrapwords[i+j].width;
	    spacewidth = wrapwords[i+j].spacewidth;
	    j++;
	    if (linelen > thiswidth) {
		/*
		 * If we're over the width limit, abandon ship,
		 * _unless_ there is no best-effort yet (which will
		 * only happen if the first word is too long all by
		 * itself).
		 */
		if (best > 0)
		    break;
	    }
	    if (i+j == nwords) {
		/*
		 * Special case: if we're at the very end of the
		 * paragraph, we don't score penalty points for the
		 * white space left on the line.
		 */
		cost = 0;
	    } else {
		cost = (thiswidth-linelen) * (thiswidth-linelen);
		cost += wrapwords[i+j].cost;
	    }
	    /*
	     * We compare bestcost >= cost, not bestcost > cost,
	     * because in cases where the costs are identical we
	     * want to try to look like the greedy algorithm,
	     * because readers are likely to have spent a lot of
	     * time looking at greedy-wrapped paragraphs and
	     * there's no point violating the Principle of Least
	     * Surprise if it doesn't actually gain anything.
	     */
	    if (best < 0 || bestcost >= cost) {
		bestcost = cost;
		best = j;
	    }
	}
	/*
	 * Now we know the optimal answer for this terminal
	 * subsequence, so put it in wrapwords.
	 */
	wrapwords[i].cost = bestcost;
	wrapwords[i].nwords = best;
    }

    /*
     * We've wrapped the paragraph. Now build the output
     * `wrappedline' list.
     */
    i = 0;
    while (i < nwords) {
	wrappedline *w = mknew(wrappedline);
	*ptr = w;
	ptr = &w->next;
	w->next = NULL;

	n = wrapwords[i].nwords;
	w->begin = wrapwords[i].begin;
	w->end = wrapwords[i+n-1].end;

	/*
	 * Count along the words to find nspaces and shortfall.
	 */
	w->nspaces = 0;
	w->shortfall = width;
	for (j = 0; j < n; j++) {
	    w->shortfall -= wrapwords[i+j].width;
	    if (j < n-1 && wrapwords[i+j].spacewidth) {
		w->nspaces++;
		w->shortfall -= wrapwords[i+j].spacewidth;
	    }
	}
	i += n;
    }

    sfree(wrapwords);

    return head;
}

void wrap_free(wrappedline *w) {
    while (w) {
	wrappedline *t = w->next;
	sfree(w);
	w = t;
    }
}
Commit	Line	Data
d7482997	1	/*
	2	* misc.c: miscellaneous useful items
	3	*/
	4
	5	#include "halibut.h"
	6
	7	struct stackTag {
	8	void **data;
	9	int sp;
	10	int size;
	11	};
	12
	13	stack stk_new(void) {
	14	stack s;
	15
	16	s = mknew(struct stackTag);
	17	s->sp = 0;
	18	s->size = 0;
	19	s->data = NULL;
	20
	21	return s;
	22	}
	23
	24	void stk_free(stack s) {
	25	sfree(s->data);
	26	sfree(s);
	27	}
	28
	29	void stk_push(stack s, void *item) {
	30	if (s->size <= s->sp) {
	31	s->size = s->sp + 32;
	32	s->data = resize(s->data, s->size);
	33	}
	34	s->data[s->sp++] = item;
	35	}
	36
	37	void *stk_pop(stack s) {
	38	if (s->sp > 0)
	39	return s->data[--s->sp];
	40	else
	41	return NULL;
	42	}
	43
7136a6c7	44	void *stk_top(stack s) {
	45	if (s->sp > 0)
	46	return s->data[s->sp-1];
	47	else
	48	return NULL;
	49	}
	50
d7482997	51	/*
	52	* Small routines to amalgamate a string from an input source.
	53	*/
	54	const rdstring empty_rdstring = {0, 0, NULL};
	55	const rdstringc empty_rdstringc = {0, 0, NULL};
	56
	57	void rdadd(rdstring *rs, wchar_t c) {
	58	if (rs->pos >= rs->size-1) {
	59	rs->size = rs->pos + 128;
	60	rs->text = resize(rs->text, rs->size);
	61	}
	62	rs->text[rs->pos++] = c;
	63	rs->text[rs->pos] = 0;
	64	}
	65	void rdadds(rdstring rs, wchar_t p) {
	66	int len = ustrlen(p);
	67	if (rs->pos >= rs->size - len) {
	68	rs->size = rs->pos + len + 128;
	69	rs->text = resize(rs->text, rs->size);
	70	}
	71	ustrcpy(rs->text + rs->pos, p);
	72	rs->pos += len;
	73	}
	74	wchar_t rdtrim(rdstring rs) {
	75	rs->text = resize(rs->text, rs->pos + 1);
	76	return rs->text;
	77	}
	78
	79	void rdaddc(rdstringc *rs, char c) {
	80	if (rs->pos >= rs->size-1) {
	81	rs->size = rs->pos + 128;
	82	rs->text = resize(rs->text, rs->size);
	83	}
	84	rs->text[rs->pos++] = c;
	85	rs->text[rs->pos] = 0;
	86	}
	87	void rdaddsc(rdstringc rs, char p) {
	88	int len = strlen(p);
	89	if (rs->pos >= rs->size - len) {
	90	rs->size = rs->pos + len + 128;
	91	rs->text = resize(rs->text, rs->size);
	92	}
	93	strcpy(rs->text + rs->pos, p);
	94	rs->pos += len;
	95	}
	96	char rdtrimc(rdstringc rs) {
	97	rs->text = resize(rs->text, rs->pos + 1);
	98	return rs->text;
	99	}
	100
831da32e	101	static int compare_wordlists_literally(word a, word b) {
d7482997	102	int t;
	103	while (a && b) {
	104	if (a->type != b->type)
	105	return (a->type < b->type ? -1 : +1); /* FIXME? */
	106	t = a->type;
	107	if ((t != word_Normal && t != word_Code &&
	108	t != word_WeakCode && t != word_Emph) \|\|
	109	a->alt \|\| b->alt) {
	110	int c;
	111	if (a->text && b->text) {
	112	c = ustricmp(a->text, b->text);
	113	if (c)
	114	return c;
	115	}
831da32e	116	c = compare_wordlists_literally(a->alt, b->alt);
d7482997	117	if (c)
	118	return c;
	119	a = a->next;
	120	b = b->next;
	121	} else {
	122	wchar_t ap = a->text, bp = b->text;
	123	while (ap && bp) {
	124	wchar_t ac = utolower(ap), bc = utolower(bp);
	125	if (ac != bc)
	126	return (ac < bc ? -1 : +1);
	127	if (!*++ap && a->next && a->next->type == t && !a->next->alt)
	128	a = a->next, ap = a->text;
	129	if (!*++bp && b->next && b->next->type == t && !b->next->alt)
	130	b = b->next, bp = b->text;
	131	}
	132	if (ap \|\| bp)
	133	return (*ap ? +1 : -1);
	134	a = a->next;
	135	b = b->next;
	136	}
	137	}
	138
	139	if (a \|\| b)
	140	return (a ? +1 : -1);
	141	else
	142	return 0;
	143	}
	144
831da32e	145	int compare_wordlists(word a, word b) {
	146	/*
	147	* First we compare only the alphabetic content of the word
	148	* lists, with case not a factor. If that comes out equal,
	149	* _then_ we compare the word lists literally.
	150	*/
	151	struct {
	152	word *w;
	153	int i;
	154	wchar_t c;
	155	} pos[2];
	156
	157	pos[0].w = a;
	158	pos[1].w = b;
	159	pos[0].i = pos[1].i = 0;
	160
	161	while (1) {
	162	/*
	163	* Find the next alphabetic character in each word list.
	164	*/
	165	int k;
	166
	167	for (k = 0; k < 2; k++) {
	168	/*
	169	* Advance until we hit either an alphabetic character
	170	* or the end of the word list.
	171	*/
	172	while (1) {
	173	if (!pos[k].w) {
	174	/* End of word list. */
	175	pos[k].c = 0;
	176	break;
	177	} else if (!pos[k].w->text \|\| !pos[k].w->text[pos[k].i]) {
	178	/* No characters remaining in this word; move on. */
	179	pos[k].w = pos[k].w->next;
	180	pos[k].i = 0;
	181	} else if (!uisalpha(pos[k].w->text[pos[k].i])) {
	182	/* This character isn't alphabetic; move on. */
	183	pos[k].i++;
	184	} else {
	185	/* We have an alphabetic! Lowercase it and continue. */
	186	pos[k].c = utolower(pos[k].w->text[pos[k].i]);
	187	break;
	188	}
	189	}
	190	}
	191
	192	if (pos[0].c < pos[1].c)
	193	return -1;
	194	else if (pos[0].c > pos[1].c)
	195	return +1;
	196
	197	if (!pos[0].c)
	198	break; /* they're equal */
	199
	200	pos[0].i++;
	201	pos[1].i++;
	202	}
	203
	204	/*
	205	* If we reach here, the strings were alphabetically equal, so
	206	* compare in more detail.
	207	*/
	208	return compare_wordlists_literally(a, b);
209	}
210
d7482997	211	void mark_attr_ends(paragraph *sourceform) {
	212	paragraph *p;
	213	word w, wp;
	214	for (p = sourceform; p; p = p->next) {
	215	wp = NULL;
	216	for (w = p->words; w; w = w->next) {
	217	if (isattr(w->type)) {
	218	int before = (wp && isattr(wp->type) &&
	219	sameattr(wp->type, w->type));
	220	int after = (w->next && isattr(w->next->type) &&
	221	sameattr(w->next->type, w->type));
	222	w->aux \|= (before ?
	223	(after ? attr_Always : attr_Last) :
	224	(after ? attr_First : attr_Only));
	225	}
	226	wp = w;
	227	}
	228	}
	229	}
	230
	231	wrappedline wrap_para(word text, int width, int subsequentwidth,
	232	int (widthfn)(word )) {
	233	wrappedline head = NULL, *ptr = &head;
	234	int nwords, wordsize;
	235	struct wrapword {
	236	word begin, end;
	237	int width;
	238	int spacewidth;
	239	int cost;
	240	int nwords;
	241	} *wrapwords;
	242	int i, j, n;
	243
	244	/*
	245	* Break the line up into wrappable components.
	246	*/
	247	nwords = wordsize = 0;
	248	wrapwords = NULL;
	249	while (text) {
	250	if (nwords >= wordsize) {
	251	wordsize = nwords + 64;
	252	wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
	253	}
	254	wrapwords[nwords].width = 0;
	255	wrapwords[nwords].begin = text;
	256	while (text) {
	257	wrapwords[nwords].width += widthfn(text);
	258	wrapwords[nwords].end = text->next;
	259	if (text->next && (text->next->type == word_WhiteSpace \|\|
	260	text->next->type == word_EmphSpace \|\|
	261	text->breaks))
	262	break;
	263	text = text->next;
	264	}
	265	if (text && text->next && (text->next->type == word_WhiteSpace \|\|
	266	text->next->type == word_EmphSpace)) {
	267	wrapwords[nwords].spacewidth = widthfn(text->next);
	268	text = text->next;
	269	} else {
	270	wrapwords[nwords].spacewidth = 0;
	271	}
	272	nwords++;
	273	if (text)
	274	text = text->next;
275	}
276
277	/*
278	* Perform the dynamic wrapping algorithm: work backwards from
279	* nwords-1, determining the optimal wrapping for each terminal
280	* subsequence of the paragraph.
281	*/
282	for (i = nwords; i-- ;) {
283	int best = -1;
284	int bestcost = 0;
285	int cost;
286	int linelen = 0, spacewidth = 0;
287	int seenspace;
288	int thiswidth = (i == 0 ? width : subsequentwidth);
289
290	j = 0;
291	seenspace = 0;
292	while (i+j < nwords) {
293	/*
294	* See what happens if we put j+1 words on this line.
295	*/
296	if (spacewidth)
297	seenspace = 1;
298	linelen += spacewidth + wrapwords[i+j].width;
299	spacewidth = wrapwords[i+j].spacewidth;
300	j++;
301	if (linelen > thiswidth) {
302	/*
303	* If we're over the width limit, abandon ship,
304	* _unless_ there is no best-effort yet (which will
305	* only happen if the first word is too long all by
306	* itself).
307	*/
308	if (best > 0)
309	break;
310	}
311	if (i+j == nwords) {
312	/*
313	* Special case: if we're at the very end of the
314	* paragraph, we don't score penalty points for the
315	* white space left on the line.
316	*/
317	cost = 0;
318	} else {
319	cost = (thiswidth-linelen) * (thiswidth-linelen);
320	cost += wrapwords[i+j].cost;
321	}
322	/*
323	* We compare bestcost >= cost, not bestcost > cost,
324	* because in cases where the costs are identical we
325	* want to try to look like the greedy algorithm,
326	* because readers are likely to have spent a lot of
327	* time looking at greedy-wrapped paragraphs and
328	* there's no point violating the Principle of Least
329	* Surprise if it doesn't actually gain anything.
330	*/
331	if (best < 0 \|\| bestcost >= cost) {
332	bestcost = cost;
333	best = j;
334	}
335	}
336	/*
337	* Now we know the optimal answer for this terminal
338	* subsequence, so put it in wrapwords.
339	*/
340	wrapwords[i].cost = bestcost;
341	wrapwords[i].nwords = best;
342	}
343
344	/*
345	* We've wrapped the paragraph. Now build the output
346	* `wrappedline' list.
347	*/
348	i = 0;
349	while (i < nwords) {
350	wrappedline *w = mknew(wrappedline);
351	*ptr = w;
352	ptr = &w->next;
353	w->next = NULL;
354
355	n = wrapwords[i].nwords;
356	w->begin = wrapwords[i].begin;
357	w->end = wrapwords[i+n-1].end;
358
359	/*
360	* Count along the words to find nspaces and shortfall.
361	*/
362	w->nspaces = 0;
363	w->shortfall = width;
364	for (j = 0; j < n; j++) {
365	w->shortfall -= wrapwords[i+j].width;
366	if (j < n-1 && wrapwords[i+j].spacewidth) {
367	w->nspaces++;
368	w->shortfall -= wrapwords[i+j].spacewidth;
369	}
370	}
371	i += n;
372	}
373
374	sfree(wrapwords);
375
376	return head;
377	}
378
379	void wrap_free(wrappedline *w) {
380	while (w) {
381	wrappedline *t = w->next;
382	sfree(w);
383	w = t;
384	}
385	}