[sgt/halibut] / misc.c

/*
 * misc.c: miscellaneous useful items
 */

#include "halibut.h"

struct stackTag {
    void **data;
    int sp;
    int size;
};

stack stk_new(void) {
    stack s;

    s = mknew(struct stackTag);
    s->sp = 0;
    s->size = 0;
    s->data = NULL;

    return s;
}

void stk_free(stack s) {
    sfree(s->data);
    sfree(s);
}

void stk_push(stack s, void *item) {
    if (s->size <= s->sp) {
	s->size = s->sp + 32;
	s->data = resize(s->data, s->size);
    }
    s->data[s->sp++] = item;
}

void *stk_pop(stack s) {
    if (s->sp > 0)
	return s->data[--s->sp];
    else
	return NULL;
}

/*
 * Small routines to amalgamate a string from an input source.
 */
const rdstring empty_rdstring = {0, 0, NULL};
const rdstringc empty_rdstringc = {0, 0, NULL};

void rdadd(rdstring *rs, wchar_t c) {
    if (rs->pos >= rs->size-1) {
	rs->size = rs->pos + 128;
	rs->text = resize(rs->text, rs->size);
    }
    rs->text[rs->pos++] = c;
    rs->text[rs->pos] = 0;
}
void rdadds(rdstring *rs, wchar_t *p) {
    int len = ustrlen(p);
    if (rs->pos >= rs->size - len) {
	rs->size = rs->pos + len + 128;
	rs->text = resize(rs->text, rs->size);
    }
    ustrcpy(rs->text + rs->pos, p);
    rs->pos += len;
}
wchar_t *rdtrim(rdstring *rs) {
    rs->text = resize(rs->text, rs->pos + 1);
    return rs->text;
}

void rdaddc(rdstringc *rs, char c) {
    if (rs->pos >= rs->size-1) {
	rs->size = rs->pos + 128;
	rs->text = resize(rs->text, rs->size);
    }
    rs->text[rs->pos++] = c;
    rs->text[rs->pos] = 0;
}
void rdaddsc(rdstringc *rs, char *p) {
    int len = strlen(p);
    if (rs->pos >= rs->size - len) {
	rs->size = rs->pos + len + 128;
	rs->text = resize(rs->text, rs->size);
    }
    strcpy(rs->text + rs->pos, p);
    rs->pos += len;
}
char *rdtrimc(rdstringc *rs) {
    rs->text = resize(rs->text, rs->pos + 1);
    return rs->text;
}

int compare_wordlists(word *a, word *b) {
    int t;
    while (a && b) {
	if (a->type != b->type)
	    return (a->type < b->type ? -1 : +1);   /* FIXME? */
	t = a->type;
	if ((t != word_Normal && t != word_Code &&
	     t != word_WeakCode && t != word_Emph) ||
	    a->alt || b->alt) {
	    int c;
	    if (a->text && b->text) {
		c = ustricmp(a->text, b->text);
		if (c)
		    return c;
	    }
	    c = compare_wordlists(a->alt, b->alt);
	    if (c)
		return c;
	    a = a->next;
	    b = b->next;
	} else {
	    wchar_t *ap = a->text, *bp = b->text;
	    while (*ap && *bp) {
		wchar_t ac = utolower(*ap), bc = utolower(*bp);
		if (ac != bc)
		    return (ac < bc ? -1 : +1);
		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
		    a = a->next, ap = a->text;
		if (!*++bp && b->next && b->next->type == t && !b->next->alt)
		    b = b->next, bp = b->text;
	    }
	    if (*ap || *bp)
		return (*ap ? +1 : -1);
	    a = a->next;
	    b = b->next;
	}
    }

    if (a || b)
	return (a ? +1 : -1);
    else
	return 0;
}

void mark_attr_ends(paragraph *sourceform) {
    paragraph *p;
    word *w, *wp;
    for (p = sourceform; p; p = p->next) {
	wp = NULL;
	for (w = p->words; w; w = w->next) {
	    if (isattr(w->type)) {
		int before = (wp && isattr(wp->type) &&
			      sameattr(wp->type, w->type));
		int after = (w->next && isattr(w->next->type) &&
			     sameattr(w->next->type, w->type));
		w->aux |= (before ?
			   (after ? attr_Always : attr_Last) :
			   (after ? attr_First : attr_Only));
	    }
	    wp = w;
	}
    }
}

wrappedline *wrap_para(word *text, int width, int subsequentwidth,
		       int (*widthfn)(word *)) {
    wrappedline *head = NULL, **ptr = &head;
    int nwords, wordsize;
    struct wrapword {
	word *begin, *end;
	int width;
	int spacewidth;
	int cost;
	int nwords;
    } *wrapwords;
    int i, j, n;

    /*
     * Break the line up into wrappable components.
     */
    nwords = wordsize = 0;
    wrapwords = NULL;
    while (text) {
	if (nwords >= wordsize) {
	    wordsize = nwords + 64;
	    wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
	}
	wrapwords[nwords].width = 0;
	wrapwords[nwords].begin = text;
	while (text) {
	    wrapwords[nwords].width += widthfn(text);
	    wrapwords[nwords].end = text->next;
	    if (text->next && (text->next->type == word_WhiteSpace ||
			       text->next->type == word_EmphSpace ||
			       text->breaks))
		break;
	    text = text->next;
	}
	if (text && text->next && (text->next->type == word_WhiteSpace ||
			   text->next->type == word_EmphSpace)) {
	    wrapwords[nwords].spacewidth = widthfn(text->next);
	    text = text->next;
	} else {
	    wrapwords[nwords].spacewidth = 0;
	}
	nwords++;
	if (text)
	    text = text->next;
    }

    /*
     * Perform the dynamic wrapping algorithm: work backwards from
     * nwords-1, determining the optimal wrapping for each terminal
     * subsequence of the paragraph.
     */
    for (i = nwords; i-- ;) {
	int best = -1;
	int bestcost = 0;
	int cost;
	int linelen = 0, spacewidth = 0;
	int seenspace;
	int thiswidth = (i == 0 ? width : subsequentwidth);

	j = 0;
	seenspace = 0;
	while (i+j < nwords) {
	    /*
	     * See what happens if we put j+1 words on this line.
	     */
	    if (spacewidth)
		seenspace = 1;
	    linelen += spacewidth + wrapwords[i+j].width;
	    spacewidth = wrapwords[i+j].spacewidth;
	    j++;
	    if (linelen > thiswidth) {
		/*
		 * If we're over the width limit, abandon ship,
		 * _unless_ there is no best-effort yet (which will
		 * only happen if the first word is too long all by
		 * itself).
		 */
		if (best > 0)
		    break;
	    }
	    if (i+j == nwords) {
		/*
		 * Special case: if we're at the very end of the
		 * paragraph, we don't score penalty points for the
		 * white space left on the line.
		 */
		cost = 0;
	    } else {
		cost = (thiswidth-linelen) * (thiswidth-linelen);
		cost += wrapwords[i+j].cost;
	    }
	    /*
	     * We compare bestcost >= cost, not bestcost > cost,
	     * because in cases where the costs are identical we
	     * want to try to look like the greedy algorithm,
	     * because readers are likely to have spent a lot of
	     * time looking at greedy-wrapped paragraphs and
	     * there's no point violating the Principle of Least
	     * Surprise if it doesn't actually gain anything.
	     */
	    if (best < 0 || bestcost >= cost) {
		bestcost = cost;
		best = j;
	    }
	}
	/*
	 * Now we know the optimal answer for this terminal
	 * subsequence, so put it in wrapwords.
	 */
	wrapwords[i].cost = bestcost;
	wrapwords[i].nwords = best;
    }

    /*
     * We've wrapped the paragraph. Now build the output
     * `wrappedline' list.
     */
    i = 0;
    while (i < nwords) {
	wrappedline *w = mknew(wrappedline);
	*ptr = w;
	ptr = &w->next;
	w->next = NULL;

	n = wrapwords[i].nwords;
	w->begin = wrapwords[i].begin;
	w->end = wrapwords[i+n-1].end;

	/*
	 * Count along the words to find nspaces and shortfall.
	 */
	w->nspaces = 0;
	w->shortfall = width;
	for (j = 0; j < n; j++) {
	    w->shortfall -= wrapwords[i+j].width;
	    if (j < n-1 && wrapwords[i+j].spacewidth) {
		w->nspaces++;
		w->shortfall -= wrapwords[i+j].spacewidth;
	    }
	}
	i += n;
    }

    sfree(wrapwords);

    return head;
}

void wrap_free(wrappedline *w) {
    while (w) {
	wrappedline *t = w->next;
	sfree(w);
	w = t;
    }
}
Commit	Line	Data
d7482997	1	/*
	2	* misc.c: miscellaneous useful items
	3	*/
	4
	5	#include "halibut.h"
	6
	7	struct stackTag {
	8	void **data;
	9	int sp;
	10	int size;
	11	};
	12
	13	stack stk_new(void) {
	14	stack s;
	15
	16	s = mknew(struct stackTag);
	17	s->sp = 0;
	18	s->size = 0;
	19	s->data = NULL;
	20
	21	return s;
	22	}
	23
	24	void stk_free(stack s) {
	25	sfree(s->data);
	26	sfree(s);
	27	}
	28
	29	void stk_push(stack s, void *item) {
	30	if (s->size <= s->sp) {
	31	s->size = s->sp + 32;
	32	s->data = resize(s->data, s->size);
	33	}
	34	s->data[s->sp++] = item;
	35	}
	36
	37	void *stk_pop(stack s) {
	38	if (s->sp > 0)
	39	return s->data[--s->sp];
	40	else
	41	return NULL;
	42	}
	43
	44	/*
	45	* Small routines to amalgamate a string from an input source.
	46	*/
	47	const rdstring empty_rdstring = {0, 0, NULL};
	48	const rdstringc empty_rdstringc = {0, 0, NULL};
	49
	50	void rdadd(rdstring *rs, wchar_t c) {
	51	if (rs->pos >= rs->size-1) {
	52	rs->size = rs->pos + 128;
	53	rs->text = resize(rs->text, rs->size);
	54	}
	55	rs->text[rs->pos++] = c;
	56	rs->text[rs->pos] = 0;
	57	}
	58	void rdadds(rdstring rs, wchar_t p) {
	59	int len = ustrlen(p);
	60	if (rs->pos >= rs->size - len) {
	61	rs->size = rs->pos + len + 128;
	62	rs->text = resize(rs->text, rs->size);
	63	}
	64	ustrcpy(rs->text + rs->pos, p);
65	rs->pos += len;
66	}
67	wchar_t rdtrim(rdstring rs) {
68	rs->text = resize(rs->text, rs->pos + 1);
69	return rs->text;
70	}
71
72	void rdaddc(rdstringc *rs, char c) {
73	if (rs->pos >= rs->size-1) {
74	rs->size = rs->pos + 128;
75	rs->text = resize(rs->text, rs->size);
76	}
77	rs->text[rs->pos++] = c;
78	rs->text[rs->pos] = 0;
79	}
80	void rdaddsc(rdstringc rs, char p) {
81	int len = strlen(p);
82	if (rs->pos >= rs->size - len) {
83	rs->size = rs->pos + len + 128;
84	rs->text = resize(rs->text, rs->size);
85	}
86	strcpy(rs->text + rs->pos, p);
87	rs->pos += len;
88	}
89	char rdtrimc(rdstringc rs) {
90	rs->text = resize(rs->text, rs->pos + 1);
91	return rs->text;
92	}
93
94	int compare_wordlists(word a, word b) {
95	int t;
96	while (a && b) {
97	if (a->type != b->type)
98	return (a->type < b->type ? -1 : +1); /* FIXME? */
99	t = a->type;
100	if ((t != word_Normal && t != word_Code &&
101	t != word_WeakCode && t != word_Emph) \|\|
102	a->alt \|\| b->alt) {
103	int c;
104	if (a->text && b->text) {
105	c = ustricmp(a->text, b->text);
106	if (c)
107	return c;
108	}
109	c = compare_wordlists(a->alt, b->alt);
110	if (c)
111	return c;
112	a = a->next;
113	b = b->next;
114	} else {
115	wchar_t ap = a->text, bp = b->text;
116	while (ap && bp) {
117	wchar_t ac = utolower(ap), bc = utolower(bp);
118	if (ac != bc)
119	return (ac < bc ? -1 : +1);
120	if (!*++ap && a->next && a->next->type == t && !a->next->alt)
121	a = a->next, ap = a->text;
122	if (!*++bp && b->next && b->next->type == t && !b->next->alt)
123	b = b->next, bp = b->text;
124	}
125	if (ap \|\| bp)
126	return (*ap ? +1 : -1);
127	a = a->next;
128	b = b->next;
129	}
130	}
131
132	if (a \|\| b)
133	return (a ? +1 : -1);
134	else
135	return 0;
136	}
137
138	void mark_attr_ends(paragraph *sourceform) {
139	paragraph *p;
140	word w, wp;
141	for (p = sourceform; p; p = p->next) {
142	wp = NULL;
143	for (w = p->words; w; w = w->next) {
144	if (isattr(w->type)) {
145	int before = (wp && isattr(wp->type) &&
146	sameattr(wp->type, w->type));
147	int after = (w->next && isattr(w->next->type) &&
148	sameattr(w->next->type, w->type));
149	w->aux \|= (before ?
150	(after ? attr_Always : attr_Last) :
151	(after ? attr_First : attr_Only));
152	}
153	wp = w;
154	}
155	}
156	}
157
158	wrappedline wrap_para(word text, int width, int subsequentwidth,
159	int (widthfn)(word )) {
160	wrappedline head = NULL, *ptr = &head;
161	int nwords, wordsize;
162	struct wrapword {
163	word begin, end;
164	int width;
165	int spacewidth;
166	int cost;
167	int nwords;
168	} *wrapwords;
169	int i, j, n;
170
171	/*
172	* Break the line up into wrappable components.
173	*/
174	nwords = wordsize = 0;
175	wrapwords = NULL;
176	while (text) {
177	if (nwords >= wordsize) {
178	wordsize = nwords + 64;
179	wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
180	}
181	wrapwords[nwords].width = 0;
182	wrapwords[nwords].begin = text;
183	while (text) {
184	wrapwords[nwords].width += widthfn(text);
185	wrapwords[nwords].end = text->next;
186	if (text->next && (text->next->type == word_WhiteSpace \|\|
187	text->next->type == word_EmphSpace \|\|
188	text->breaks))
189	break;
190	text = text->next;
191	}
192	if (text && text->next && (text->next->type == word_WhiteSpace \|\|
193	text->next->type == word_EmphSpace)) {
194	wrapwords[nwords].spacewidth = widthfn(text->next);
195	text = text->next;
196	} else {
197	wrapwords[nwords].spacewidth = 0;
198	}
199	nwords++;
200	if (text)
201	text = text->next;
202	}
203
204	/*
205	* Perform the dynamic wrapping algorithm: work backwards from
206	* nwords-1, determining the optimal wrapping for each terminal
207	* subsequence of the paragraph.
208	*/
209	for (i = nwords; i-- ;) {
210	int best = -1;
211	int bestcost = 0;
212	int cost;
213	int linelen = 0, spacewidth = 0;
214	int seenspace;
215	int thiswidth = (i == 0 ? width : subsequentwidth);
216
217	j = 0;
218	seenspace = 0;
219	while (i+j < nwords) {
220	/*
221	* See what happens if we put j+1 words on this line.
222	*/
223	if (spacewidth)
224	seenspace = 1;
225	linelen += spacewidth + wrapwords[i+j].width;
226	spacewidth = wrapwords[i+j].spacewidth;
227	j++;
228	if (linelen > thiswidth) {
229	/*
230	* If we're over the width limit, abandon ship,
231	* _unless_ there is no best-effort yet (which will
232	* only happen if the first word is too long all by
233	* itself).
234	*/
235	if (best > 0)
236	break;
237	}
238	if (i+j == nwords) {
239	/*
240	* Special case: if we're at the very end of the
241	* paragraph, we don't score penalty points for the
242	* white space left on the line.
243	*/
244	cost = 0;
245	} else {
246	cost = (thiswidth-linelen) * (thiswidth-linelen);
247	cost += wrapwords[i+j].cost;
248	}
249	/*
250	* We compare bestcost >= cost, not bestcost > cost,
251	* because in cases where the costs are identical we
252	* want to try to look like the greedy algorithm,
253	* because readers are likely to have spent a lot of
254	* time looking at greedy-wrapped paragraphs and
255	* there's no point violating the Principle of Least
256	* Surprise if it doesn't actually gain anything.
257	*/
258	if (best < 0 \|\| bestcost >= cost) {
259	bestcost = cost;
260	best = j;
261	}
262	}
263	/*
264	* Now we know the optimal answer for this terminal
265	* subsequence, so put it in wrapwords.
266	*/
267	wrapwords[i].cost = bestcost;
268	wrapwords[i].nwords = best;
269	}
270
271	/*
272	* We've wrapped the paragraph. Now build the output
273	* `wrappedline' list.
274	*/
275	i = 0;
276	while (i < nwords) {
277	wrappedline *w = mknew(wrappedline);
278	*ptr = w;
279	ptr = &w->next;
280	w->next = NULL;
281
282	n = wrapwords[i].nwords;
283	w->begin = wrapwords[i].begin;
284	w->end = wrapwords[i+n-1].end;
285
286	/*
287	* Count along the words to find nspaces and shortfall.
288	*/
289	w->nspaces = 0;
290	w->shortfall = width;
291	for (j = 0; j < n; j++) {
292	w->shortfall -= wrapwords[i+j].width;
293	if (j < n-1 && wrapwords[i+j].spacewidth) {
294	w->nspaces++;
295	w->shortfall -= wrapwords[i+j].spacewidth;
296	}
297	}
298	i += n;
299	}
300
301	sfree(wrapwords);
302
303	return head;
304	}
305
306	void wrap_free(wrappedline *w) {
307	while (w) {
308	wrappedline *t = w->next;
309	sfree(w);
310	w = t;
311	}
312	}