git.distorted.org.uk Git - u/mdw/putty/blame_incremental

... / ...

Commit	Line	Data
	1	#include <stdio.h>
	2	#include <stdlib.h>
	3	#include <ctype.h>
	4	#include <locale.h>
	5	#include <limits.h>
	6	#include <wchar.h>
	7
	8	#include <time.h>
	9
	10	#include "putty.h"
	11	#include "charset.h"
	12	#include "terminal.h"
	13	#include "misc.h"
	14
	15	/*
	16	* Unix Unicode-handling routines.
	17	*/
	18
	19	int is_dbcs_leadbyte(int codepage, char byte)
	20	{
	21	return 0; /* we don't do DBCS */
	22	}
	23
	24	int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen,
	25	wchar_t *wcstr, int wclen)
	26	{
	27	if (codepage == DEFAULT_CODEPAGE) {
	28	int n = 0;
	29	mbstate_t state;
	30
	31	memset(&state, 0, sizeof state);
	32
	33	while (mblen > 0) {
	34	size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
	35	if (i == (size_t)-1 \|\| i == (size_t)-2)
	36	break;
	37	n++;
	38	mbstr += i;
	39	mblen -= i;
	40	}
	41
	42	return n;
	43	} else if (codepage == CS_NONE) {
	44	int n = 0;
	45
	46	while (mblen > 0) {
	47	wcstr[n] = 0xD800 \| (mbstr[0] & 0xFF);
	48	n++;
	49	mbstr++;
	50	mblen--;
	51	}
	52
	53	return n;
	54	} else
	55	return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
	56	NULL, NULL, 0);
	57	}
	58
	59	int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
	60	char mbstr, int mblen, char defchr, int *defused,
	61	struct unicode_data *ucsdata)
	62	{
	63	/* FIXME: we should remove the defused param completely... */
	64	if (defused)
	65	*defused = 0;
	66
	67	if (codepage == DEFAULT_CODEPAGE) {
	68	char output[MB_LEN_MAX];
	69	mbstate_t state;
	70	int n = 0;
	71
	72	memset(&state, 0, sizeof state);
	73
	74	while (wclen > 0) {
	75	int i = wcrtomb(output, wcstr[0], &state);
	76	if (i == (size_t)-1 \|\| i > n - mblen)
	77	break;
	78	memcpy(mbstr+n, output, i);
	79	n += i;
	80	wcstr++;
	81	wclen--;
	82	}
	83
	84	return n;
	85	} else if (codepage == CS_NONE) {
	86	int n = 0;
	87	while (wclen > 0 && n < mblen) {
	88	if (wcstr >= 0xD800 && wcstr < 0xD900)
	89	mbstr[n++] = (*wcstr & 0xFF);
	90	else if (defchr)
	91	mbstr[n++] = *defchr;
	92	wcstr++;
	93	wclen--;
	94	}
	95	return n;
	96	} else {
	97	return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
	98	NULL, defchr?defchr:NULL, defchr?1:0);
	99	}
	100	}
	101
	102	/*
	103	* Return value is TRUE if pterm is to run in direct-to-font mode.
	104	*/
	105	int init_ucs(struct unicode_data ucsdata, char linecharset,
	106	int utf8_override, int font_charset, int vtmode)
	107	{
	108	int i, ret = 0;
	109
	110	/*
	111	* In the platform-independent parts of the code, font_codepage
	112	* is used only for system DBCS support - which we don't
	113	* support at all. So we set this to something which will never
	114	* be used.
	115	*/
	116	ucsdata->font_codepage = -1;
	117
	118	/*
	119	* If utf8_override is set and the POSIX locale settings
	120	* dictate a UTF-8 character set, then just go straight for
	121	* UTF-8.
	122	*/
	123	ucsdata->line_codepage = CS_NONE;
	124	if (utf8_override) {
	125	const char *s;
	126	if (((s = getenv("LC_ALL")) && *s) \|\|
	127	((s = getenv("LC_CTYPE")) && *s) \|\|
	128	((s = getenv("LANG")) && *s)) {
	129	if (strstr(s, "UTF-8"))
	130	ucsdata->line_codepage = CS_UTF8;
	131	}
	132	}
	133
	134	/*
	135	* Failing that, line_codepage should be decoded from the
	136	* specification in conf.
	137	*/
	138	if (ucsdata->line_codepage == CS_NONE)
	139	ucsdata->line_codepage = decode_codepage(linecharset);
	140
	141	/*
	142	* If line_codepage is _still_ CS_NONE, we assume we're using
	143	* the font's own encoding. This has been passed in to us, so
	144	* we use that. If it's still CS_NONE after _that_ - i.e. the
	145	* font we were given had an incomprehensible charset - then we
	146	* fall back to using the D800 page.
	147	*/
	148	if (ucsdata->line_codepage == CS_NONE)
	149	ucsdata->line_codepage = font_charset;
	150
	151	if (ucsdata->line_codepage == CS_NONE)
	152	ret = 1;
	153
	154	/*
	155	* Set up unitab_line, by translating each individual character
	156	* in the line codepage into Unicode.
	157	*/
	158	for (i = 0; i < 256; i++) {
	159	char c[1];
	160	const char *p;
	161	wchar_t wc[1];
	162	int len;
	163	c[0] = i;
	164	p = c;
	165	len = 1;
	166	if (ucsdata->line_codepage == CS_NONE)
	167	ucsdata->unitab_line[i] = 0xD800 \| i;
	168	else if (1 == charset_to_unicode(&p, &len, wc, 1,
	169	ucsdata->line_codepage,
	170	NULL, L"", 0))
	171	ucsdata->unitab_line[i] = wc[0];
	172	else
	173	ucsdata->unitab_line[i] = 0xFFFD;
	174	}
	175
	176	/*
	177	* Set up unitab_xterm. This is the same as unitab_line except
	178	* in the line-drawing regions, where it follows the Unicode
	179	* encoding.
	180	*
	181	* (Note that the strange X encoding of line-drawing characters
	182	* in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
	183	* by the font encoding, which will spot such a font and act as
	184	* if it were in a variant encoding of ISO8859-1.)
	185	*/
	186	for (i = 0; i < 256; i++) {
	187	static const wchar_t unitab_xterm_std[32] = {
	188	0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
	189	0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
	190	0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
	191	0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
	192	};
	193	static const wchar_t unitab_xterm_poorman[32] =
	194	L"#*o~+++++-----++++\|****L. ";
	195
	196	const wchar_t *ptr;
	197
	198	if (vtmode == VT_POORMAN)
	199	ptr = unitab_xterm_poorman;
	200	else
	201	ptr = unitab_xterm_std;
	202
	203	if (i >= 0x5F && i < 0x7F)
	204	ucsdata->unitab_xterm[i] = ptr[i & 0x1F];
	205	else
	206	ucsdata->unitab_xterm[i] = ucsdata->unitab_line[i];
	207	}
	208
	209	/*
	210	* Set up unitab_scoacs. The SCO Alternate Character Set is
	211	* simply CP437.
	212	*/
	213	for (i = 0; i < 256; i++) {
	214	char c[1];
	215	const char *p;
	216	wchar_t wc[1];
	217	int len;
	218	c[0] = i;
	219	p = c;
	220	len = 1;
	221	if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0))
	222	ucsdata->unitab_scoacs[i] = wc[0];
	223	else
	224	ucsdata->unitab_scoacs[i] = 0xFFFD;
	225	}
	226
	227	/*
	228	* Find the control characters in the line codepage. For
	229	* direct-to-font mode using the D800 hack, we assume 00-1F and
	230	* 7F are controls, but allow 80-9F through. (It's as good a
	231	* guess as anything; and my bet is that half the weird fonts
	232	* used in this way will be IBM or MS code pages anyway.)
	233	*/
	234	for (i = 0; i < 256; i++) {
	235	int lineval = ucsdata->unitab_line[i];
	236	if (lineval < ' ' \|\| (lineval >= 0x7F && lineval < 0xA0) \|\|
	237	(lineval >= 0xD800 && lineval < 0xD820) \|\| (lineval == 0xD87F))
	238	ucsdata->unitab_ctrl[i] = i;
	239	else
	240	ucsdata->unitab_ctrl[i] = 0xFF;
	241	}
	242
	243	return ret;
	244	}
	245
	246	const char *cp_name(int codepage)
	247	{
	248	if (codepage == CS_NONE)
	249	return "Use font encoding";
	250	return charset_to_localenc(codepage);
	251	}
	252
	253	const char *cp_enumerate(int index)
	254	{
	255	int charset;
	256	if (index == 0)
	257	return "Use font encoding";
	258	charset = charset_localenc_nth(index-1);
	259	if (charset == CS_NONE)
	260	return NULL;
	261	return charset_to_localenc(charset);
	262	}
	263
	264	int decode_codepage(char *cp_name)
	265	{
	266	if (!*cp_name)
	267	return CS_NONE; /* use font encoding */
	268	return charset_from_localenc(cp_name);
	269	}