[sgt/halibut] / misc.c

/*
 * misc.c: miscellaneous useful items
 */

#include "halibut.h"

struct stackTag {
    void **data;
    int sp;
    int size;
};

stack stk_new(void) {
    stack s;

    s = mknew(struct stackTag);
    s->sp = 0;
    s->size = 0;
    s->data = NULL;

    return s;
}

void stk_free(stack s) {
    sfree(s->data);
    sfree(s);
}

void stk_push(stack s, void *item) {
    if (s->size <= s->sp) {
	s->size = s->sp + 32;
	s->data = resize(s->data, s->size);
    }
    s->data[s->sp++] = item;
}

void *stk_pop(stack s) {
    if (s->sp > 0)
	return s->data[--s->sp];
    else
	return NULL;
}

void *stk_top(stack s) {
    if (s->sp > 0)
	return s->data[s->sp-1];
    else
	return NULL;
}

/*
 * Small routines to amalgamate a string from an input source.
 */
const rdstring empty_rdstring = {0, 0, NULL};
const rdstringc empty_rdstringc = {0, 0, NULL};

void rdadd(rdstring *rs, wchar_t c) {
    if (rs->pos >= rs->size-1) {
	rs->size = rs->pos + 128;
	rs->text = resize(rs->text, rs->size);
    }
    rs->text[rs->pos++] = c;
    rs->text[rs->pos] = 0;
}
void rdadds(rdstring *rs, wchar_t *p) {
    int len = ustrlen(p);
    if (rs->pos >= rs->size - len) {
	rs->size = rs->pos + len + 128;
	rs->text = resize(rs->text, rs->size);
    }
    ustrcpy(rs->text + rs->pos, p);
    rs->pos += len;
}
wchar_t *rdtrim(rdstring *rs) {
    rs->text = resize(rs->text, rs->pos + 1);
    return rs->text;
}

void rdaddc(rdstringc *rs, char c) {
    if (rs->pos >= rs->size-1) {
	rs->size = rs->pos + 128;
	rs->text = resize(rs->text, rs->size);
    }
    rs->text[rs->pos++] = c;
    rs->text[rs->pos] = 0;
}
void rdaddsc(rdstringc *rs, char *p) {
    int len = strlen(p);
    if (rs->pos >= rs->size - len) {
	rs->size = rs->pos + len + 128;
	rs->text = resize(rs->text, rs->size);
    }
    strcpy(rs->text + rs->pos, p);
    rs->pos += len;
}
char *rdtrimc(rdstringc *rs) {
    rs->text = resize(rs->text, rs->pos + 1);
    return rs->text;
}

int compare_wordlists(word *a, word *b) {
    int t;
    while (a && b) {
	if (a->type != b->type)
	    return (a->type < b->type ? -1 : +1);   /* FIXME? */
	t = a->type;
	if ((t != word_Normal && t != word_Code &&
	     t != word_WeakCode && t != word_Emph) ||
	    a->alt || b->alt) {
	    int c;
	    if (a->text && b->text) {
		c = ustricmp(a->text, b->text);
		if (c)
		    return c;
	    }
	    c = compare_wordlists(a->alt, b->alt);
	    if (c)
		return c;
	    a = a->next;
	    b = b->next;
	} else {
	    wchar_t *ap = a->text, *bp = b->text;
	    while (*ap && *bp) {
		wchar_t ac = utolower(*ap), bc = utolower(*bp);
		if (ac != bc)
		    return (ac < bc ? -1 : +1);
		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
		    a = a->next, ap = a->text;
		if (!*++bp && b->next && b->next->type == t && !b->next->alt)
		    b = b->next, bp = b->text;
	    }
	    if (*ap || *bp)
		return (*ap ? +1 : -1);
	    a = a->next;
	    b = b->next;
	}
    }

    if (a || b)
	return (a ? +1 : -1);
    else
	return 0;
}

void mark_attr_ends(paragraph *sourceform) {
    paragraph *p;
    word *w, *wp;
    for (p = sourceform; p; p = p->next) {
	wp = NULL;
	for (w = p->words; w; w = w->next) {
	    if (isattr(w->type)) {
		int before = (wp && isattr(wp->type) &&
			      sameattr(wp->type, w->type));
		int after = (w->next && isattr(w->next->type) &&
			     sameattr(w->next->type, w->type));
		w->aux |= (before ?
			   (after ? attr_Always : attr_Last) :
			   (after ? attr_First : attr_Only));
	    }
	    wp = w;
	}
    }
}

wrappedline *wrap_para(word *text, int width, int subsequentwidth,
		       int (*widthfn)(word *)) {
    wrappedline *head = NULL, **ptr = &head;
    int nwords, wordsize;
    struct wrapword {
	word *begin, *end;
	int width;
	int spacewidth;
	int cost;
	int nwords;
    } *wrapwords;
    int i, j, n;

    /*
     * Break the line up into wrappable components.
     */
    nwords = wordsize = 0;
    wrapwords = NULL;
    while (text) {
	if (nwords >= wordsize) {
	    wordsize = nwords + 64;
	    wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
	}
	wrapwords[nwords].width = 0;
	wrapwords[nwords].begin = text;
	while (text) {
	    wrapwords[nwords].width += widthfn(text);
	    wrapwords[nwords].end = text->next;
	    if (text->next && (text->next->type == word_WhiteSpace ||
			       text->next->type == word_EmphSpace ||
			       text->breaks))
		break;
	    text = text->next;
	}
	if (text && text->next && (text->next->type == word_WhiteSpace ||
			   text->next->type == word_EmphSpace)) {
	    wrapwords[nwords].spacewidth = widthfn(text->next);
	    text = text->next;
	} else {
	    wrapwords[nwords].spacewidth = 0;
	}
	nwords++;
	if (text)
	    text = text->next;
    }

    /*
     * Perform the dynamic wrapping algorithm: work backwards from
     * nwords-1, determining the optimal wrapping for each terminal
     * subsequence of the paragraph.
     */
    for (i = nwords; i-- ;) {
	int best = -1;
	int bestcost = 0;
	int cost;
	int linelen = 0, spacewidth = 0;
	int seenspace;
	int thiswidth = (i == 0 ? width : subsequentwidth);

	j = 0;
	seenspace = 0;
	while (i+j < nwords) {
	    /*
	     * See what happens if we put j+1 words on this line.
	     */
	    if (spacewidth)
		seenspace = 1;
	    linelen += spacewidth + wrapwords[i+j].width;
	    spacewidth = wrapwords[i+j].spacewidth;
	    j++;
	    if (linelen > thiswidth) {
		/*
		 * If we're over the width limit, abandon ship,
		 * _unless_ there is no best-effort yet (which will
		 * only happen if the first word is too long all by
		 * itself).
		 */
		if (best > 0)
		    break;
	    }
	    if (i+j == nwords) {
		/*
		 * Special case: if we're at the very end of the
		 * paragraph, we don't score penalty points for the
		 * white space left on the line.
		 */
		cost = 0;
	    } else {
		cost = (thiswidth-linelen) * (thiswidth-linelen);
		cost += wrapwords[i+j].cost;
	    }
	    /*
	     * We compare bestcost >= cost, not bestcost > cost,
	     * because in cases where the costs are identical we
	     * want to try to look like the greedy algorithm,
	     * because readers are likely to have spent a lot of
	     * time looking at greedy-wrapped paragraphs and
	     * there's no point violating the Principle of Least
	     * Surprise if it doesn't actually gain anything.
	     */
	    if (best < 0 || bestcost >= cost) {
		bestcost = cost;
		best = j;
	    }
	}
	/*
	 * Now we know the optimal answer for this terminal
	 * subsequence, so put it in wrapwords.
	 */
	wrapwords[i].cost = bestcost;
	wrapwords[i].nwords = best;
    }

    /*
     * We've wrapped the paragraph. Now build the output
     * `wrappedline' list.
     */
    i = 0;
    while (i < nwords) {
	wrappedline *w = mknew(wrappedline);
	*ptr = w;
	ptr = &w->next;
	w->next = NULL;

	n = wrapwords[i].nwords;
	w->begin = wrapwords[i].begin;
	w->end = wrapwords[i+n-1].end;

	/*
	 * Count along the words to find nspaces and shortfall.
	 */
	w->nspaces = 0;
	w->shortfall = width;
	for (j = 0; j < n; j++) {
	    w->shortfall -= wrapwords[i+j].width;
	    if (j < n-1 && wrapwords[i+j].spacewidth) {
		w->nspaces++;
		w->shortfall -= wrapwords[i+j].spacewidth;
	    }
	}
	i += n;
    }

    sfree(wrapwords);

    return head;
}

void wrap_free(wrappedline *w) {
    while (w) {
	wrappedline *t = w->next;
	sfree(w);
	w = t;
    }
}
Commit	Line	Data
d7482997	1	/*
	2	* misc.c: miscellaneous useful items
	3	*/
	4
	5	#include "halibut.h"
	6
	7	struct stackTag {
	8	void **data;
	9	int sp;
	10	int size;
	11	};
	12
	13	stack stk_new(void) {
	14	stack s;
	15
	16	s = mknew(struct stackTag);
	17	s->sp = 0;
	18	s->size = 0;
	19	s->data = NULL;
	20
	21	return s;
	22	}
	23
	24	void stk_free(stack s) {
	25	sfree(s->data);
	26	sfree(s);
	27	}
	28
	29	void stk_push(stack s, void *item) {
	30	if (s->size <= s->sp) {
	31	s->size = s->sp + 32;
	32	s->data = resize(s->data, s->size);
	33	}
	34	s->data[s->sp++] = item;
	35	}
	36
	37	void *stk_pop(stack s) {
	38	if (s->sp > 0)
	39	return s->data[--s->sp];
	40	else
	41	return NULL;
	42	}
	43
7136a6c7	44	void *stk_top(stack s) {
	45	if (s->sp > 0)
	46	return s->data[s->sp-1];
	47	else
	48	return NULL;
	49	}
	50
d7482997	51	/*
	52	* Small routines to amalgamate a string from an input source.
	53	*/
	54	const rdstring empty_rdstring = {0, 0, NULL};
	55	const rdstringc empty_rdstringc = {0, 0, NULL};
	56
	57	void rdadd(rdstring *rs, wchar_t c) {
	58	if (rs->pos >= rs->size-1) {
	59	rs->size = rs->pos + 128;
	60	rs->text = resize(rs->text, rs->size);
	61	}
	62	rs->text[rs->pos++] = c;
	63	rs->text[rs->pos] = 0;
	64	}
	65	void rdadds(rdstring rs, wchar_t p) {
	66	int len = ustrlen(p);
	67	if (rs->pos >= rs->size - len) {
	68	rs->size = rs->pos + len + 128;
	69	rs->text = resize(rs->text, rs->size);
	70	}
	71	ustrcpy(rs->text + rs->pos, p);
	72	rs->pos += len;
	73	}
	74	wchar_t rdtrim(rdstring rs) {
	75	rs->text = resize(rs->text, rs->pos + 1);
	76	return rs->text;
	77	}
	78
	79	void rdaddc(rdstringc *rs, char c) {
	80	if (rs->pos >= rs->size-1) {
	81	rs->size = rs->pos + 128;
	82	rs->text = resize(rs->text, rs->size);
	83	}
	84	rs->text[rs->pos++] = c;
	85	rs->text[rs->pos] = 0;
	86	}
	87	void rdaddsc(rdstringc rs, char p) {
	88	int len = strlen(p);
	89	if (rs->pos >= rs->size - len) {
	90	rs->size = rs->pos + len + 128;
	91	rs->text = resize(rs->text, rs->size);
	92	}
	93	strcpy(rs->text + rs->pos, p);
	94	rs->pos += len;
	95	}
	96	char rdtrimc(rdstringc rs) {
	97	rs->text = resize(rs->text, rs->pos + 1);
	98	return rs->text;
	99	}
	100
	101	int compare_wordlists(word a, word b) {
	102	int t;
	103	while (a && b) {
	104	if (a->type != b->type)
	105	return (a->type < b->type ? -1 : +1); /* FIXME? */
	106	t = a->type;
	107	if ((t != word_Normal && t != word_Code &&
	108	t != word_WeakCode && t != word_Emph) \|\|
	109	a->alt \|\| b->alt) {
	110	int c;
	111	if (a->text && b->text) {
	112	c = ustricmp(a->text, b->text);
	113	if (c)
	114	return c;
115	}
116	c = compare_wordlists(a->alt, b->alt);
117	if (c)
118	return c;
119	a = a->next;
120	b = b->next;
121	} else {
122	wchar_t ap = a->text, bp = b->text;
123	while (ap && bp) {
124	wchar_t ac = utolower(ap), bc = utolower(bp);
125	if (ac != bc)
126	return (ac < bc ? -1 : +1);
127	if (!*++ap && a->next && a->next->type == t && !a->next->alt)
128	a = a->next, ap = a->text;
129	if (!*++bp && b->next && b->next->type == t && !b->next->alt)
130	b = b->next, bp = b->text;
131	}
132	if (ap \|\| bp)
133	return (*ap ? +1 : -1);
134	a = a->next;
135	b = b->next;
136	}
137	}
138
139	if (a \|\| b)
140	return (a ? +1 : -1);
141	else
142	return 0;
143	}
144
145	void mark_attr_ends(paragraph *sourceform) {
146	paragraph *p;
147	word w, wp;
148	for (p = sourceform; p; p = p->next) {
149	wp = NULL;
150	for (w = p->words; w; w = w->next) {
151	if (isattr(w->type)) {
152	int before = (wp && isattr(wp->type) &&
153	sameattr(wp->type, w->type));
154	int after = (w->next && isattr(w->next->type) &&
155	sameattr(w->next->type, w->type));
156	w->aux \|= (before ?
157	(after ? attr_Always : attr_Last) :
158	(after ? attr_First : attr_Only));
159	}
160	wp = w;
161	}
162	}
163	}
164
165	wrappedline wrap_para(word text, int width, int subsequentwidth,
166	int (widthfn)(word )) {
167	wrappedline head = NULL, *ptr = &head;
168	int nwords, wordsize;
169	struct wrapword {
170	word begin, end;
171	int width;
172	int spacewidth;
173	int cost;
174	int nwords;
175	} *wrapwords;
176	int i, j, n;
177
178	/*
179	* Break the line up into wrappable components.
180	*/
181	nwords = wordsize = 0;
182	wrapwords = NULL;
183	while (text) {
184	if (nwords >= wordsize) {
185	wordsize = nwords + 64;
186	wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
187	}
188	wrapwords[nwords].width = 0;
189	wrapwords[nwords].begin = text;
190	while (text) {
191	wrapwords[nwords].width += widthfn(text);
192	wrapwords[nwords].end = text->next;
193	if (text->next && (text->next->type == word_WhiteSpace \|\|
194	text->next->type == word_EmphSpace \|\|
195	text->breaks))
196	break;
197	text = text->next;
198	}
199	if (text && text->next && (text->next->type == word_WhiteSpace \|\|
200	text->next->type == word_EmphSpace)) {
201	wrapwords[nwords].spacewidth = widthfn(text->next);
202	text = text->next;
203	} else {
204	wrapwords[nwords].spacewidth = 0;
205	}
206	nwords++;
207	if (text)
208	text = text->next;
209	}
210
211	/*
212	* Perform the dynamic wrapping algorithm: work backwards from
213	* nwords-1, determining the optimal wrapping for each terminal
214	* subsequence of the paragraph.
215	*/
216	for (i = nwords; i-- ;) {
217	int best = -1;
218	int bestcost = 0;
219	int cost;
220	int linelen = 0, spacewidth = 0;
221	int seenspace;
222	int thiswidth = (i == 0 ? width : subsequentwidth);
223
224	j = 0;
225	seenspace = 0;
226	while (i+j < nwords) {
227	/*
228	* See what happens if we put j+1 words on this line.
229	*/
230	if (spacewidth)
231	seenspace = 1;
232	linelen += spacewidth + wrapwords[i+j].width;
233	spacewidth = wrapwords[i+j].spacewidth;
234	j++;
235	if (linelen > thiswidth) {
236	/*
237	* If we're over the width limit, abandon ship,
238	* _unless_ there is no best-effort yet (which will
239	* only happen if the first word is too long all by
240	* itself).
241	*/
242	if (best > 0)
243	break;
244	}
245	if (i+j == nwords) {
246	/*
247	* Special case: if we're at the very end of the
248	* paragraph, we don't score penalty points for the
249	* white space left on the line.
250	*/
251	cost = 0;
252	} else {
253	cost = (thiswidth-linelen) * (thiswidth-linelen);
254	cost += wrapwords[i+j].cost;
255	}
256	/*
257	* We compare bestcost >= cost, not bestcost > cost,
258	* because in cases where the costs are identical we
259	* want to try to look like the greedy algorithm,
260	* because readers are likely to have spent a lot of
261	* time looking at greedy-wrapped paragraphs and
262	* there's no point violating the Principle of Least
263	* Surprise if it doesn't actually gain anything.
264	*/
265	if (best < 0 \|\| bestcost >= cost) {
266	bestcost = cost;
267	best = j;
268	}
269	}
270	/*
271	* Now we know the optimal answer for this terminal
272	* subsequence, so put it in wrapwords.
273	*/
274	wrapwords[i].cost = bestcost;
275	wrapwords[i].nwords = best;
276	}
277
278	/*
279	* We've wrapped the paragraph. Now build the output
280	* `wrappedline' list.
281	*/
282	i = 0;
283	while (i < nwords) {
284	wrappedline *w = mknew(wrappedline);
285	*ptr = w;
286	ptr = &w->next;
287	w->next = NULL;
288
289	n = wrapwords[i].nwords;
290	w->begin = wrapwords[i].begin;
291	w->end = wrapwords[i+n-1].end;
292
293	/*
294	* Count along the words to find nspaces and shortfall.
295	*/
296	w->nspaces = 0;
297	w->shortfall = width;
298	for (j = 0; j < n; j++) {
299	w->shortfall -= wrapwords[i+j].width;
300	if (j < n-1 && wrapwords[i+j].spacewidth) {
301	w->nspaces++;
302	w->shortfall -= wrapwords[i+j].spacewidth;
303	}
304	}
305	i += n;
306	}
307
308	sfree(wrapwords);
309
310	return head;
311	}
312
313	void wrap_free(wrappedline *w) {
314	while (w) {
315	wrappedline *t = w->next;
316	sfree(w);
317	w = t;
318	}
319	}