mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* misc.c: miscellaneous useful items
	3	*/
	4
	5	#include <stdarg.h>
	6	#include "halibut.h"
	7
	8	char adv(char s) {
	9	return s + 1 + strlen(s);
	10	}
	11
	12	struct stackTag {
	13	void **data;
	14	int sp;
	15	int size;
	16	};
	17
	18	stack stk_new(void) {
	19	stack s;
	20
	21	s = mknew(struct stackTag);
	22	s->sp = 0;
	23	s->size = 0;
	24	s->data = NULL;
	25
	26	return s;
	27	}
	28
	29	void stk_free(stack s) {
	30	sfree(s->data);
	31	sfree(s);
	32	}
	33
	34	void stk_push(stack s, void *item) {
	35	if (s->size <= s->sp) {
	36	s->size = s->sp + 32;
	37	s->data = resize(s->data, s->size);
	38	}
	39	s->data[s->sp++] = item;
	40	}
	41
	42	void *stk_pop(stack s) {
	43	if (s->sp > 0)
	44	return s->data[--s->sp];
	45	else
	46	return NULL;
	47	}
	48
	49	void *stk_top(stack s) {
	50	if (s->sp > 0)
	51	return s->data[s->sp-1];
	52	else
	53	return NULL;
	54	}
	55
	56	/*
	57	* Small routines to amalgamate a string from an input source.
	58	*/
	59	const rdstring empty_rdstring = {0, 0, NULL};
	60	const rdstringc empty_rdstringc = {0, 0, NULL};
	61
	62	void rdadd(rdstring *rs, wchar_t c) {
	63	if (rs->pos >= rs->size-1) {
	64	rs->size = rs->pos + 128;
	65	rs->text = resize(rs->text, rs->size);
	66	}
	67	rs->text[rs->pos++] = c;
	68	rs->text[rs->pos] = 0;
	69	}
	70	void rdadds(rdstring rs, wchar_t const p) {
	71	int len = ustrlen(p);
	72	if (rs->pos >= rs->size - len) {
	73	rs->size = rs->pos + len + 128;
	74	rs->text = resize(rs->text, rs->size);
	75	}
	76	ustrcpy(rs->text + rs->pos, p);
	77	rs->pos += len;
	78	}
	79	wchar_t rdtrim(rdstring rs) {
	80	rs->text = resize(rs->text, rs->pos + 1);
	81	return rs->text;
	82	}
	83
	84	void rdaddc(rdstringc *rs, char c) {
	85	if (rs->pos >= rs->size-1) {
	86	rs->size = rs->pos + 128;
	87	rs->text = resize(rs->text, rs->size);
	88	}
	89	rs->text[rs->pos++] = c;
	90	rs->text[rs->pos] = 0;
	91	}
	92	void rdaddsc(rdstringc rs, char const p) {
	93	int len = strlen(p);
	94	if (rs->pos >= rs->size - len) {
	95	rs->size = rs->pos + len + 128;
	96	rs->text = resize(rs->text, rs->size);
	97	}
	98	strcpy(rs->text + rs->pos, p);
	99	rs->pos += len;
	100	}
	101	char rdtrimc(rdstringc rs) {
	102	rs->text = resize(rs->text, rs->pos + 1);
	103	return rs->text;
	104	}
	105
	106	static int compare_wordlists_literally(word a, word b) {
	107	int t;
	108	while (a && b) {
	109	if (a->type != b->type)
	110	return (a->type < b->type ? -1 : +1); /* FIXME? */
	111	t = a->type;
	112	if ((t != word_Normal && t != word_Code &&
	113	t != word_WeakCode && t != word_Emph) \|\|
	114	a->alt \|\| b->alt) {
	115	int c;
	116	if (a->text && b->text) {
	117	c = ustricmp(a->text, b->text);
	118	if (c)
	119	return c;
	120	}
	121	c = compare_wordlists_literally(a->alt, b->alt);
	122	if (c)
	123	return c;
	124	a = a->next;
	125	b = b->next;
	126	} else {
	127	wchar_t ap = a->text, bp = b->text;
	128	while (ap && bp) {
	129	wchar_t ac = utolower(ap), bc = utolower(bp);
	130	if (ac != bc)
	131	return (ac < bc ? -1 : +1);
	132	if (!*++ap && a->next && a->next->type == t && !a->next->alt)
	133	a = a->next, ap = a->text;
	134	if (!*++bp && b->next && b->next->type == t && !b->next->alt)
	135	b = b->next, bp = b->text;
	136	}
	137	if (ap \|\| bp)
	138	return (*ap ? +1 : -1);
	139	a = a->next;
	140	b = b->next;
	141	}
	142	}
	143
	144	if (a \|\| b)
	145	return (a ? +1 : -1);
	146	else
	147	return 0;
	148	}
	149
	150	int compare_wordlists(word a, word b) {
	151	/*
	152	* First we compare only the alphabetic content of the word
	153	* lists, with case not a factor. If that comes out equal,
	154	* _then_ we compare the word lists literally.
	155	*/
	156	struct {
	157	word *w;
	158	int i;
	159	wchar_t c;
	160	} pos[2];
	161
	162	pos[0].w = a;
	163	pos[1].w = b;
	164	pos[0].i = pos[1].i = 0;
	165
	166	while (1) {
	167	/*
	168	* Find the next alphabetic character in each word list.
	169	*/
	170	int k;
	171
	172	for (k = 0; k < 2; k++) {
	173	/*
	174	* Advance until we hit either an alphabetic character
	175	* or the end of the word list.
	176	*/
	177	while (1) {
	178	if (!pos[k].w) {
	179	/* End of word list. */
	180	pos[k].c = 0;
	181	break;
	182	} else if (!pos[k].w->text \|\| !pos[k].w->text[pos[k].i]) {
	183	/* No characters remaining in this word; move on. */
	184	pos[k].w = pos[k].w->next;
	185	pos[k].i = 0;
	186	} else if (!uisalpha(pos[k].w->text[pos[k].i])) {
	187	/* This character isn't alphabetic; move on. */
	188	pos[k].i++;
	189	} else {
	190	/* We have an alphabetic! Lowercase it and continue. */
	191	pos[k].c = utolower(pos[k].w->text[pos[k].i]);
	192	break;
	193	}
	194	}
	195	}
	196
	197	if (pos[0].c < pos[1].c)
	198	return -1;
	199	else if (pos[0].c > pos[1].c)
	200	return +1;
	201
	202	if (!pos[0].c)
	203	break; /* they're equal */
	204
	205	pos[0].i++;
	206	pos[1].i++;
	207	}
	208
	209	/*
	210	* If we reach here, the strings were alphabetically equal, so
	211	* compare in more detail.
	212	*/
	213	return compare_wordlists_literally(a, b);
	214	}
	215
	216	void mark_attr_ends(paragraph *sourceform) {
	217	paragraph *p;
	218	word w, wp;
	219	for (p = sourceform; p; p = p->next) {
	220	wp = NULL;
	221	for (w = p->words; w; w = w->next) {
	222	if (isattr(w->type)) {
	223	int before = (wp && isattr(wp->type) &&
	224	sameattr(wp->type, w->type));
	225	int after = (w->next && isattr(w->next->type) &&
	226	sameattr(w->next->type, w->type));
	227	w->aux \|= (before ?
	228	(after ? attr_Always : attr_Last) :
	229	(after ? attr_First : attr_Only));
	230	}
	231	wp = w;
	232	}
	233	}
	234	}
	235
	236	/*
	237	* This function implements the optimal paragraph wrapping
	238	* algorithm, pretty much as used in TeX. A cost function is
	239	* defined for each line of the wrapped paragraph (typically some
	240	* convex function of the difference between the line's length and
	241	* its desired length), and a dynamic programming approach is used
	242	* to optimise globally across all possible layouts of the
	243	* paragraph to find the one with the minimum total cost.
	244	*
	245	* The function as implemented here gives a choice of two options
	246	* for the cost function:
	247	*
	248	* - If `natural_space' is zero, then the algorithm attempts to
	249	* make each line the maximum possible width (either `width' or
	250	* `subsequentwidth' depending on whether it's the first line of
	251	* the paragraph or not), and the cost function is simply the
	252	* square of the unused space at the end of each line. This is a
	253	* simple mechanism suitable for use in fixed-pitch environments
	254	* such as plain text displayed on a terminal.
	255	*
	256	* - However, if `natural_space' is positive, the algorithm
	257	* assumes the medium is fully graphical and that the width of
	258	* space characters can be adjusted finely, and it attempts to
	259	* make each _space character_ the width given in
	260	* `natural_space'. (The provided width function should return
	261	* the _minimum_ acceptable width of a space character in this
	262	* case.) Therefore, the cost function for a line is dependent
	263	* on the number of spaces on that line as well as the amount by
	264	* which the line width differs from the optimum.
	265	*/
	266	wrappedline wrap_para(word text, int width, int subsequentwidth,
	267	int (widthfn)(void , word ), void ctx,
	268	int natural_space) {
	269	wrappedline head = NULL, *ptr = &head;
	270	int nwords, wordsize;
	271	struct wrapword {
	272	word begin, end;
	273	int width;
	274	int spacewidth;
	275	int cost;
	276	int nwords;
	277	} *wrapwords;
	278	int i, j, n;
	279
	280	/*
	281	* Break the line up into wrappable components.
	282	*/
	283	nwords = wordsize = 0;
	284	wrapwords = NULL;
	285	while (text) {
	286	if (nwords >= wordsize) {
	287	wordsize = nwords + 64;
	288	wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
	289	}
	290	wrapwords[nwords].width = 0;
	291	wrapwords[nwords].begin = text;
	292	while (text) {
	293	wrapwords[nwords].width += widthfn(ctx, text);
	294	wrapwords[nwords].end = text->next;
	295	if (text->next && (text->next->type == word_WhiteSpace \|\|
	296	text->next->type == word_EmphSpace \|\|
	297	text->breaks))
	298	break;
	299	text = text->next;
	300	}
	301	if (text && text->next && (text->next->type == word_WhiteSpace \|\|
	302	text->next->type == word_EmphSpace)) {
	303	wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
	304	text = text->next;
	305	} else {
	306	wrapwords[nwords].spacewidth = 0;
	307	}
	308	nwords++;
	309	if (text)
	310	text = text->next;
	311	}
	312
	313	/*
	314	* Perform the dynamic wrapping algorithm: work backwards from
	315	* nwords-1, determining the optimal wrapping for each terminal
	316	* subsequence of the paragraph.
	317	*/
	318	for (i = nwords; i-- ;) {
	319	int best = -1;
	320	int bestcost = 0;
	321	int cost;
	322	int linelen = 0, spacewidth = 0, minspacewidth = 0;
	323	int nspaces;
	324	int thiswidth = (i == 0 ? width : subsequentwidth);
	325
	326	j = 0;
	327	nspaces = 0;
	328	while (i+j < nwords) {
	329	/*
	330	* See what happens if we put j+1 words on this line.
	331	*/
	332	if (spacewidth) {
	333	nspaces++;
	334	minspacewidth = spacewidth;
	335	}
	336	linelen += spacewidth + wrapwords[i+j].width;
	337	spacewidth = wrapwords[i+j].spacewidth;
	338	j++;
	339	if (linelen > thiswidth) {
	340	/*
	341	* If we're over the width limit, abandon ship,
	342	* _unless_ there is no best-effort yet (which will
	343	* only happen if the first word is too long all by
	344	* itself).
	345	*/
	346	if (best > 0)
	347	break;
	348	}
	349
	350	/*
	351	* Compute the cost of this line. The method of doing
	352	* this differs hugely depending on whether
	353	* natural_space is nonzero or not.
	354	*/
	355	if (natural_space) {
	356	if (!nspaces && linelen > thiswidth) {
	357	/*
	358	* Special case: if there are no spaces at all
	359	* on the line because one single word is too
	360	* long for its line, cost is zero because
	361	* there's nothing we can do about it anyway.
	362	*/
	363	cost = 0;
	364	} else {
	365	int shortfall = thiswidth - linelen;
	366	int spaceextra = shortfall / (nspaces ? nspaces : 1);
	367	int spaceshortfall = natural_space -
	368	(minspacewidth + spaceextra);
	369
	370	if (i+j == nwords && spaceshortfall < 0) {
	371	/*
	372	* Special case: on the very last line of
	373	* the paragraph, we don't score penalty
	374	* points for having to _stretch_ the line,
	375	* since we won't stretch it anyway.
	376	* However, we score penalties as normal
	377	* for having to squeeze it.
	378	*/
	379	cost = 0;
	380	} else {
	381	/*
	382	* Squaring this number is tricky since
	383	* it's liable to be quite big. Let's
	384	* divide it through by 256.
	385	*/
	386	int x = spaceshortfall >> 8;
	387	int xf = spaceshortfall & 0xFF;
	388
	389	/*
	390	* Not counting strange variable-fixed-
	391	* point oddities, we are computing
	392	*
	393	* (x+xf)^2 = x^2 + 2xxf + xf*xf
	394	*
	395	* except that _our_ xf is 256 times the
	396	* one listed there.
	397	*/
	398
	399	cost = x * x;
	400	cost += (2 * x * xf) >> 8;
	401	}
	402	}
	403	} else {
	404	if (i+j == nwords) {
	405	/*
	406	* Special case: if we're at the very end of the
	407	* paragraph, we don't score penalty points for the
	408	* white space left on the line.
	409	*/
	410	cost = 0;
	411	} else {
	412	cost = (thiswidth-linelen) * (thiswidth-linelen);
	413	}
	414	}
	415
	416	/*
	417	* Add in the cost of wrapping all lines after this
	418	* point too.
	419	*/
	420	if (i+j < nwords)
	421	cost += wrapwords[i+j].cost;
	422
	423	/*
	424	* We compare bestcost >= cost, not bestcost > cost,
	425	* because in cases where the costs are identical we
	426	* want to try to look like the greedy algorithm,
	427	* because readers are likely to have spent a lot of
	428	* time looking at greedy-wrapped paragraphs and
	429	* there's no point violating the Principle of Least
	430	* Surprise if it doesn't actually gain anything.
	431	*/
	432	if (best < 0 \|\| bestcost >= cost) {
	433	bestcost = cost;
	434	best = j;
	435	}
	436	}
	437	/*
	438	* Now we know the optimal answer for this terminal
	439	* subsequence, so put it in wrapwords.
	440	*/
	441	wrapwords[i].cost = bestcost;
	442	wrapwords[i].nwords = best;
	443	}
	444
	445	/*
	446	* We've wrapped the paragraph. Now build the output
	447	* `wrappedline' list.
	448	*/
	449	i = 0;
	450	while (i < nwords) {
	451	wrappedline *w = mknew(wrappedline);
	452	*ptr = w;
	453	ptr = &w->next;
	454	w->next = NULL;
	455
	456	n = wrapwords[i].nwords;
	457	w->begin = wrapwords[i].begin;
	458	w->end = wrapwords[i+n-1].end;
	459
	460	/*
	461	* Count along the words to find nspaces and shortfall.
	462	*/
	463	w->nspaces = 0;
	464	w->shortfall = width;
	465	for (j = 0; j < n; j++) {
	466	w->shortfall -= wrapwords[i+j].width;
	467	if (j < n-1 && wrapwords[i+j].spacewidth) {
	468	w->nspaces++;
	469	w->shortfall -= wrapwords[i+j].spacewidth;
	470	}
	471	}
	472	i += n;
	473	}
	474
	475	sfree(wrapwords);
	476
	477	return head;
	478	}
	479
	480	void wrap_free(wrappedline *w) {
	481	while (w) {
	482	wrappedline *t = w->next;
	483	sfree(w);
	484	w = t;
	485	}
	486	}
	487
	488	void cmdline_cfg_add(paragraph cfg, char string)
	489	{
	490	wchar_t *ustring;
	491	int upos, ulen, pos, len;
	492
	493	ulen = 0;
	494	while (cfg->keyword[ulen])
	495	ulen += 1 + ustrlen(cfg->keyword+ulen);
	496	len = 0;
	497	while (cfg->origkeyword[len])
	498	len += 1 + strlen(cfg->origkeyword+len);
	499
	500	ustring = ufroma_dup(string, CS_FIXME);
	501
	502	upos = ulen;
	503	ulen += 2 + ustrlen(ustring);
	504	cfg->keyword = resize(cfg->keyword, ulen);
	505	ustrcpy(cfg->keyword+upos, ustring);
	506	cfg->keyword[ulen-1] = L'\0';
	507
	508	pos = len;
	509	len += 2 + strlen(string);
	510	cfg->origkeyword = resize(cfg->origkeyword, len);
	511	strcpy(cfg->origkeyword+pos, string);
	512	cfg->origkeyword[len-1] = '\0';
	513
	514	sfree(ustring);
	515	}
	516
	517	paragraph *cmdline_cfg_new(void)
	518	{
	519	paragraph *p;
	520
	521	p = mknew(paragraph);
	522	memset(p, 0, sizeof(*p));
	523	p->type = para_Config;
	524	p->next = NULL;
	525	p->fpos.filename = "<command line>";
	526	p->fpos.line = p->fpos.col = -1;
	527	p->keyword = ustrdup(L"\0");
	528	p->origkeyword = dupstr("\0");
	529
	530	return p;
	531	}
	532
	533	paragraph cmdline_cfg_simple(char string, ...)
	534	{
	535	va_list ap;
	536	char *s;
	537	paragraph *p;
	538
	539	p = cmdline_cfg_new();
	540	cmdline_cfg_add(p, string);
	541
	542	va_start(ap, string);
	543	while ((s = va_arg(ap, char *)) != NULL)
	544	cmdline_cfg_add(p, s);
	545	va_end(ap);
	546
	547	return p;
	548	}