mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* misc.c: miscellaneous useful items
	3	*/
	4
	5	#include <stdarg.h>
	6	#include "halibut.h"
	7
	8	char adv(char s) {
	9	return s + 1 + strlen(s);
	10	}
	11
	12	struct stackTag {
	13	void **data;
	14	int sp;
	15	int size;
	16	};
	17
	18	stack stk_new(void) {
	19	stack s;
	20
	21	s = snew(struct stackTag);
	22	s->sp = 0;
	23	s->size = 0;
	24	s->data = NULL;
	25
	26	return s;
	27	}
	28
	29	void stk_free(stack s) {
	30	sfree(s->data);
	31	sfree(s);
	32	}
	33
	34	void stk_push(stack s, void *item) {
	35	if (s->size <= s->sp) {
	36	s->size = s->sp + 32;
	37	s->data = sresize(s->data, s->size, void *);
	38	}
	39	s->data[s->sp++] = item;
	40	}
	41
	42	void *stk_pop(stack s) {
	43	if (s->sp > 0)
	44	return s->data[--s->sp];
	45	else
	46	return NULL;
	47	}
	48
	49	void *stk_top(stack s) {
	50	if (s->sp > 0)
	51	return s->data[s->sp-1];
	52	else
	53	return NULL;
	54	}
	55
	56	/*
	57	* Small routines to amalgamate a string from an input source.
	58	*/
	59	const rdstring empty_rdstring = {0, 0, NULL};
	60	const rdstringc empty_rdstringc = {0, 0, NULL};
	61
	62	void rdadd(rdstring *rs, wchar_t c) {
	63	if (rs->pos >= rs->size-1) {
	64	rs->size = rs->pos + 128;
	65	rs->text = sresize(rs->text, rs->size, wchar_t);
	66	}
	67	rs->text[rs->pos++] = c;
	68	rs->text[rs->pos] = 0;
	69	}
	70	void rdadds(rdstring rs, wchar_t const p) {
	71	int len = ustrlen(p);
	72	if (rs->pos >= rs->size - len) {
	73	rs->size = rs->pos + len + 128;
	74	rs->text = sresize(rs->text, rs->size, wchar_t);
	75	}
	76	ustrcpy(rs->text + rs->pos, p);
	77	rs->pos += len;
	78	}
	79	wchar_t rdtrim(rdstring rs) {
	80	rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
	81	return rs->text;
	82	}
	83
	84	void rdaddc(rdstringc *rs, char c) {
	85	if (rs->pos >= rs->size-1) {
	86	rs->size = rs->pos + 128;
	87	rs->text = sresize(rs->text, rs->size, char);
	88	}
	89	rs->text[rs->pos++] = c;
	90	rs->text[rs->pos] = 0;
	91	}
	92	void rdaddsc(rdstringc rs, char const p) {
	93	rdaddsn(rs, p, strlen(p));
	94	}
	95	void rdaddsn(rdstringc rs, char const p, int len) {
	96	if (rs->pos >= rs->size - len) {
	97	rs->size = rs->pos + len + 128;
	98	rs->text = sresize(rs->text, rs->size, char);
	99	}
	100	memcpy(rs->text + rs->pos, p, len);
	101	rs->pos += len;
	102	rs->text[rs->pos] = 0;
	103	}
	104	char rdtrimc(rdstringc rs) {
	105	rs->text = sresize(rs->text, rs->pos + 1, char);
	106	return rs->text;
	107	}
	108
	109	static int compare_wordlists_literally(word a, word b) {
	110	int t;
	111	while (a && b) {
	112	if (a->type != b->type)
	113	return (a->type < b->type ? -1 : +1); /* FIXME? */
	114	t = a->type;
	115	if ((t != word_Normal && t != word_Code &&
	116	t != word_WeakCode && t != word_Emph) \|\|
	117	a->alt \|\| b->alt) {
	118	int c;
	119	if (a->text && b->text) {
	120	c = ustricmp(a->text, b->text);
	121	if (c)
	122	return c;
	123	}
	124	c = compare_wordlists_literally(a->alt, b->alt);
	125	if (c)
	126	return c;
	127	a = a->next;
	128	b = b->next;
	129	} else {
	130	wchar_t ap = a->text, bp = b->text;
	131	while (ap && bp) {
	132	wchar_t ac = ap, bc = bp;
	133	if (ac != bc)
	134	return (ac < bc ? -1 : +1);
	135	if (!*++ap && a->next && a->next->type == t && !a->next->alt)
	136	a = a->next, ap = a->text;
	137	if (!*++bp && b->next && b->next->type == t && !b->next->alt)
	138	b = b->next, bp = b->text;
	139	}
	140	if (ap \|\| bp)
	141	return (*ap ? +1 : -1);
	142	a = a->next;
	143	b = b->next;
	144	}
	145	}
	146
	147	if (a \|\| b)
	148	return (a ? +1 : -1);
	149	else
	150	return 0;
	151	}
	152
	153	int compare_wordlists(word a, word b) {
	154	/*
	155	* First we compare only the alphabetic content of the word
	156	* lists, with case not a factor. If that comes out equal,
	157	* _then_ we compare the word lists literally.
	158	*/
	159	struct {
	160	word *w;
	161	int i;
	162	wchar_t c;
	163	} pos[2];
	164
	165	pos[0].w = a;
	166	pos[1].w = b;
	167	pos[0].i = pos[1].i = 0;
	168
	169	while (1) {
	170	/*
	171	* Find the next alphabetic character in each word list.
	172	*/
	173	int k;
	174
	175	for (k = 0; k < 2; k++) {
	176	/*
	177	* Advance until we hit either an alphabetic character
	178	* or the end of the word list.
	179	*/
	180	while (1) {
	181	if (!pos[k].w) {
	182	/* End of word list. */
	183	pos[k].c = 0;
	184	break;
	185	} else if (!pos[k].w->text \|\| !pos[k].w->text[pos[k].i]) {
	186	/* No characters remaining in this word; move on. */
	187	pos[k].w = pos[k].w->next;
	188	pos[k].i = 0;
	189	} else if (!uisalpha(pos[k].w->text[pos[k].i])) {
	190	/* This character isn't alphabetic; move on. */
	191	pos[k].i++;
	192	} else {
	193	/* We have an alphabetic! Lowercase it and continue. */
	194	pos[k].c = utolower(pos[k].w->text[pos[k].i]);
	195	break;
	196	}
	197	}
	198	}
	199
	200	#ifdef HAS_WCSCOLL
	201	{
	202	wchar_t a[2], b[2];
	203	int ret;
	204
	205	a[0] = pos[0].c;
	206	b[0] = pos[1].c;
	207	a[1] = b[1] = L'\0';
	208
	209	ret = wcscoll(a, b);
	210	if (ret)
	211	return ret;
	212	}
	213	#else
	214	if (pos[0].c < pos[1].c)
	215	return -1;
	216	else if (pos[0].c > pos[1].c)
	217	return +1;
	218	#endif
	219
	220	if (!pos[0].c)
	221	break; /* they're equal */
	222
	223	pos[0].i++;
	224	pos[1].i++;
	225	}
	226
	227	/*
	228	* If we reach here, the strings were alphabetically equal, so
	229	* compare in more detail.
	230	*/
	231	return compare_wordlists_literally(a, b);
	232	}
	233
	234	void mark_attr_ends(word *words)
	235	{
	236	word w, wp;
	237
	238	wp = NULL;
	239	for (w = words; w; w = w->next) {
	240	int both;
	241	if (!isvis(w->type))
	242	/* Invisible elements should not affect this calculation */
	243	continue;
	244	both = (isattr(w->type) &&
	245	wp && isattr(wp->type) &&
	246	sameattr(wp->type, w->type));
	247	w->aux \|= both ? attr_Always : attr_First;
	248	if (wp && !both) {
	249	/* If previous considered word turns out to have been
	250	* the end of a run, tidy it up. */
	251	int wp_attr = attraux(wp->aux);
	252	wp->aux = (wp->aux & ~attr_mask) \|
	253	((wp_attr == attr_Always) ? attr_Last
	254	/* attr_First */ : attr_Only);
	255	}
	256	wp = w;
	257	}
	258
	259	/* Tidy up last word touched */
	260	if (wp) {
	261	int wp_attr = attraux(wp->aux);
	262	wp->aux = (wp->aux & ~attr_mask) \|
	263	((wp_attr == attr_Always) ? attr_Last
	264	/* attr_First */ : attr_Only);
	265	}
	266	}
	267
	268	/*
	269	* This function implements the optimal paragraph wrapping
	270	* algorithm, pretty much as used in TeX. A cost function is
	271	* defined for each line of the wrapped paragraph (typically some
	272	* convex function of the difference between the line's length and
	273	* its desired length), and a dynamic programming approach is used
	274	* to optimise globally across all possible layouts of the
	275	* paragraph to find the one with the minimum total cost.
	276	*
	277	* The function as implemented here gives a choice of two options
	278	* for the cost function:
	279	*
	280	* - If `natural_space' is zero, then the algorithm attempts to
	281	* make each line the maximum possible width (either `width' or
	282	* `subsequentwidth' depending on whether it's the first line of
	283	* the paragraph or not), and the cost function is simply the
	284	* square of the unused space at the end of each line. This is a
	285	* simple mechanism suitable for use in fixed-pitch environments
	286	* such as plain text displayed on a terminal.
	287	*
	288	* - However, if `natural_space' is positive, the algorithm
	289	* assumes the medium is fully graphical and that the width of
	290	* space characters can be adjusted finely, and it attempts to
	291	* make each _space character_ the width given in
	292	* `natural_space'. (The provided width function should return
	293	* the _minimum_ acceptable width of a space character in this
	294	* case.) Therefore, the cost function for a line is dependent
	295	* on the number of spaces on that line as well as the amount by
	296	* which the line width differs from the optimum.
	297	*/
	298	wrappedline wrap_para(word text, int width, int subsequentwidth,
	299	int (widthfn)(void , word ), void ctx,
	300	int natural_space) {
	301	wrappedline head = NULL, *ptr = &head;
	302	int nwords, wordsize;
	303	struct wrapword {
	304	word begin, end;
	305	int width;
	306	int spacewidth;
	307	int cost;
	308	int nwords;
	309	} *wrapwords;
	310	int i, j, n;
	311
	312	/*
	313	* Break the line up into wrappable components.
	314	*/
	315	nwords = wordsize = 0;
	316	wrapwords = NULL;
	317	while (text) {
	318	if (nwords >= wordsize) {
	319	wordsize = nwords + 64;
	320	wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
	321	}
	322	wrapwords[nwords].width = 0;
	323	wrapwords[nwords].begin = text;
	324	while (text) {
	325	wrapwords[nwords].width += widthfn(ctx, text);
	326	wrapwords[nwords].end = text->next;
	327	if (text->next && (text->next->type == word_WhiteSpace \|\|
	328	text->next->type == word_EmphSpace \|\|
	329	text->breaks))
	330	break;
	331	text = text->next;
	332	}
	333	if (text && text->next && (text->next->type == word_WhiteSpace \|\|
	334	text->next->type == word_EmphSpace)) {
	335	wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
	336	text = text->next;
	337	} else {
	338	wrapwords[nwords].spacewidth = 0;
	339	}
	340	nwords++;
	341	if (text)
	342	text = text->next;
	343	}
	344
	345	/*
	346	* Perform the dynamic wrapping algorithm: work backwards from
	347	* nwords-1, determining the optimal wrapping for each terminal
	348	* subsequence of the paragraph.
	349	*/
	350	for (i = nwords; i-- ;) {
	351	int best = -1;
	352	int bestcost = 0;
	353	int cost;
	354	int linelen = 0, spacewidth = 0, minspacewidth = 0;
	355	int nspaces;
	356	int thiswidth = (i == 0 ? width : subsequentwidth);
	357
	358	j = 0;
	359	nspaces = 0;
	360	while (i+j < nwords) {
	361	/*
	362	* See what happens if we put j+1 words on this line.
	363	*/
	364	if (spacewidth) {
	365	nspaces++;
	366	minspacewidth = spacewidth;
	367	}
	368	linelen += spacewidth + wrapwords[i+j].width;
	369	spacewidth = wrapwords[i+j].spacewidth;
	370	j++;
	371	if (linelen > thiswidth) {
	372	/*
	373	* If we're over the width limit, abandon ship,
	374	* _unless_ there is no best-effort yet (which will
	375	* only happen if the first word is too long all by
	376	* itself).
	377	*/
	378	if (best > 0)
	379	break;
	380	}
	381
	382	/*
	383	* Compute the cost of this line. The method of doing
	384	* this differs hugely depending on whether
	385	* natural_space is nonzero or not.
	386	*/
	387	if (natural_space) {
	388	if (!nspaces && linelen > thiswidth) {
	389	/*
	390	* Special case: if there are no spaces at all
	391	* on the line because one single word is too
	392	* long for its line, cost is zero because
	393	* there's nothing we can do about it anyway.
	394	*/
	395	cost = 0;
	396	} else {
	397	int shortfall = thiswidth - linelen;
	398	int spaceextra = shortfall / (nspaces ? nspaces : 1);
	399	int spaceshortfall = natural_space -
	400	(minspacewidth + spaceextra);
	401
	402	if (i+j == nwords && spaceshortfall < 0) {
	403	/*
	404	* Special case: on the very last line of
	405	* the paragraph, we don't score penalty
	406	* points for having to _stretch_ the line,
	407	* since we won't stretch it anyway.
	408	* However, we score penalties as normal
	409	* for having to squeeze it.
	410	*/
	411	cost = 0;
	412	} else {
	413	/*
	414	* Squaring this number is tricky since
	415	* it's liable to be quite big. Let's
	416	* divide it through by 256.
	417	*/
	418	int x = spaceshortfall >> 8;
	419	int xf = spaceshortfall & 0xFF;
	420
	421	/*
	422	* Not counting strange variable-fixed-
	423	* point oddities, we are computing
	424	*
	425	* (x+xf)^2 = x^2 + 2xxf + xf*xf
	426	*
	427	* except that _our_ xf is 256 times the
	428	* one listed there.
	429	*/
	430
	431	cost = x * x;
	432	cost += (2 * x * xf) >> 8;
	433	}
	434	}
	435	} else {
	436	if (i+j == nwords) {
	437	/*
	438	* Special case: if we're at the very end of the
	439	* paragraph, we don't score penalty points for the
	440	* white space left on the line.
	441	*/
	442	cost = 0;
	443	} else {
	444	cost = (thiswidth-linelen) * (thiswidth-linelen);
	445	}
	446	}
	447
	448	/*
	449	* Add in the cost of wrapping all lines after this
	450	* point too.
	451	*/
	452	if (i+j < nwords)
	453	cost += wrapwords[i+j].cost;
	454
	455	/*
	456	* We compare bestcost >= cost, not bestcost > cost,
	457	* because in cases where the costs are identical we
	458	* want to try to look like the greedy algorithm,
	459	* because readers are likely to have spent a lot of
	460	* time looking at greedy-wrapped paragraphs and
	461	* there's no point violating the Principle of Least
	462	* Surprise if it doesn't actually gain anything.
	463	*/
	464	if (best < 0 \|\| bestcost >= cost) {
	465	bestcost = cost;
	466	best = j;
	467	}
	468	}
	469	/*
	470	* Now we know the optimal answer for this terminal
	471	* subsequence, so put it in wrapwords.
	472	*/
	473	wrapwords[i].cost = bestcost;
	474	wrapwords[i].nwords = best;
	475	}
	476
	477	/*
	478	* We've wrapped the paragraph. Now build the output
	479	* `wrappedline' list.
	480	*/
	481	i = 0;
	482	while (i < nwords) {
	483	wrappedline *w = snew(wrappedline);
	484	*ptr = w;
	485	ptr = &w->next;
	486	w->next = NULL;
	487
	488	n = wrapwords[i].nwords;
	489	w->begin = wrapwords[i].begin;
	490	w->end = wrapwords[i+n-1].end;
	491
	492	/*
	493	* Count along the words to find nspaces and shortfall.
	494	*/
	495	w->nspaces = 0;
	496	w->shortfall = width;
	497	for (j = 0; j < n; j++) {
	498	w->shortfall -= wrapwords[i+j].width;
	499	if (j < n-1 && wrapwords[i+j].spacewidth) {
	500	w->nspaces++;
	501	w->shortfall -= wrapwords[i+j].spacewidth;
	502	}
	503	}
	504	i += n;
	505	}
	506
	507	sfree(wrapwords);
	508
	509	return head;
	510	}
	511
	512	void wrap_free(wrappedline *w) {
	513	while (w) {
	514	wrappedline *t = w->next;
	515	sfree(w);
	516	w = t;
	517	}
	518	}
	519
	520	void cmdline_cfg_add(paragraph cfg, char string)
	521	{
	522	wchar_t *ustring;
	523	int upos, ulen, pos, len;
	524
	525	ulen = 0;
	526	while (cfg->keyword[ulen])
	527	ulen += 1 + ustrlen(cfg->keyword+ulen);
	528	len = 0;
	529	while (cfg->origkeyword[len])
	530	len += 1 + strlen(cfg->origkeyword+len);
	531
	532	ustring = ufroma_locale_dup(string);
	533
	534	upos = ulen;
	535	ulen += 2 + ustrlen(ustring);
	536	cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
	537	ustrcpy(cfg->keyword+upos, ustring);
	538	cfg->keyword[ulen-1] = L'\0';
	539
	540	pos = len;
	541	len += 2 + strlen(string);
	542	cfg->origkeyword = sresize(cfg->origkeyword, len, char);
	543	strcpy(cfg->origkeyword+pos, string);
	544	cfg->origkeyword[len-1] = '\0';
	545
	546	sfree(ustring);
	547	}
	548
	549	paragraph *cmdline_cfg_new(void)
	550	{
	551	paragraph *p;
	552
	553	p = snew(paragraph);
	554	memset(p, 0, sizeof(*p));
	555	p->type = para_Config;
	556	p->next = NULL;
	557	p->fpos.filename = "<command line>";
	558	p->fpos.line = p->fpos.col = -1;
	559	p->keyword = ustrdup(L"\0");
	560	p->origkeyword = dupstr("\0");
	561
	562	return p;
	563	}
	564
	565	paragraph cmdline_cfg_simple(char string, ...)
	566	{
	567	va_list ap;
	568	char *s;
	569	paragraph *p;
	570
	571	p = cmdline_cfg_new();
	572	cmdline_cfg_add(p, string);
	573
	574	va_start(ap, string);
	575	while ((s = va_arg(ap, char *)) != NULL)
	576	cmdline_cfg_add(p, s);
	577	va_end(ap);
	578
	579	return p;
	580	}