mdw@git.distorted.org.uk Git - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* This file is part of DisOrder.
	3	* Copyright (C) 2004, 2005 Richard Kettlewell
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful, but
	11	* WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
	18	* USA
	19	*/
	20	/** @file lib/charset.c @brief Character set conversion */
	21
	22	#include <config.h>
	23	#include "types.h"
	24
	25	#include <iconv.h>
	26	#include <string.h>
	27	#include <errno.h>
	28	#include <langinfo.h>
	29
	30	#include "mem.h"
	31	#include "log.h"
	32	#include "charset.h"
	33	#include "configuration.h"
	34	#include "utf8.h"
	35	#include "vector.h"
	36	#include "unidata.h"
	37
	38	/** @brief Low-level converstion routine
	39	* @param from Source encoding
	40	* @param to Destination encoding
	41	* @param ptr First byte to convert
	42	* @param n Number of bytes to convert
	43	* @return Converted text, 0-terminated; or NULL on error.
	44	*/
	45	static void convert(const char from, const char *to,
	46	const void *ptr, size_t n) {
	47	iconv_t i;
	48	size_t len;
	49	char buf = 0, s, *d;
	50	size_t bufsize = 0, sl, dl;
	51
	52	if((i = iconv_open(to, from)) == (iconv_t)-1)
	53	fatal(errno, "error calling iconv_open");
	54	do {
	55	bufsize = bufsize ? 2 * bufsize : 32;
	56	buf = xrealloc_noptr(buf, bufsize);
	57	iconv(i, 0, 0, 0, 0);
	58	s = (char *)ptr;
	59	sl = n;
	60	d = buf;
	61	dl = bufsize;
	62	/* (void ) to work around FreeBSD's nonstandard iconv prototype /
	63	len = iconv(i, (void *)&s, &sl, &d, &dl);
	64	} while(len == (size_t)-1 && errno == E2BIG);
	65	iconv_close(i);
	66	if(len == (size_t)-1) {
	67	error(errno, "error converting from %s to %s", from, to);
	68	return 0;
	69	}
	70	return buf;
	71	}
	72
	73	/** @brief Convert from the local multibyte encoding to UTF-8 */
	74	char mb2utf8(const char mb) {
	75	return convert(nl_langinfo(CODESET), "UTF-8", mb, strlen(mb) + 1);
	76	}
	77
	78	/** @brief Convert from UTF-8 to the local multibyte encoding */
	79	char utf82mb(const char utf8) {
	80	return convert("UTF-8", nl_langinfo(CODESET), utf8, strlen(utf8) + 1);
	81	}
	82
	83	/** @brief Convert from encoding @p from to UTF-8 */
	84	char any2utf8(const char from, const char *any) {
	85	return convert(from, "UTF-8", any, strlen(any) + 1);
	86	}
	87
	88	/** @brief Convert from encoding @p from to the local multibyte encoding */
	89	char any2mb(const char from, const char *any) {
	90	if(from) return convert(from, nl_langinfo(CODESET), any, strlen(any) + 1);
	91	else return xstrdup(any);
	92	}
	93
	94	/** @brief Convert from encoding @p from to encoding @p to */
	95	char any2any(const char from,
	96	const char *to,
	97	const char *any) {
	98	if(from \|\| to) return convert(from, to, any, strlen(any) + 1);
	99	else return xstrdup(any);
	100	}
	101
	102	/** @brief Return nonzero if @p c is a combining character */
	103	static int combining(int c) {
	104	if(c < UNICODE_NCHARS) {
	105	const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS];
	106
	107	return ud->general_category == unicode_General_Category_Mn \|\| ud->ccc != 0;
	108	}
	109	/* Assume unknown characters are noncombining */
	110	return 0;
	111	}
	112
	113	/** @brief Truncate a string for display purposes
	114	* @param s Pointer to UTF-8 string
	115	* @param max Maximum number of columns
	116	* @return @p or truncated string (never NULL)
	117	*
	118	* We don't correctly support bidi or double-width characters yet, nor
	119	* locate default grapheme cluster boundaries for saner truncation.
	120	*/
	121	const char truncate_for_display(const char s, long max) {
	122	const char t = s, r, *cut = 0;
	123	char *truncated;
	124	uint32_t c;
	125	long n = 0;
	126
	127	/* We need to discover two things: firstly whether the string is
	128	* longer than @p max glyphs and secondly if it is not, where to cut
	129	* the string.
	130	*
	131	* Combining characters follow their base character (unicode
	132	* standard 5.0 s2.11), so after each base character we must
	133	*/
	134	while(*t) {
	135	PARSE_UTF8(t, c, return s);
	136	if(combining(c))
	137	/* This must be an initial combining character. We just skip it. */
	138	continue;
	139	/* So c must be a base character. It may be followed by any
	140	* number of combining characters. We advance past them. */
	141	do {
	142	r = t;
	143	PARSE_UTF8(t, c, return s);
	144	} while(combining(c));
	145	/* Last character wasn't a combining character so back up */
	146	t = r;
	147	++n;
	148	/* So now there are N glyphs before position T. We might
	149	* therefore have reached the cut position. */
	150	if(n == max - 3)
	151	cut = t;
	152	}
	153	/* If the string is short enough we return it unmodified */
	154	if(n < max)
	155	return s;
	156	truncated = xmalloc_noptr(cut - s + 4);
	157	memcpy(truncated, s, cut - s);
	158	strcpy(truncated + (cut - s), "...");
	159	return truncated;
	160	}
	161
	162	/*
	163	Local Variables:
	164	c-basic-offset:2
	165	comment-column:40
	166	End:
	167	*/