[sgt/putty] / unix / uxucs.c

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <locale.h>
#include <limits.h>
#include <wchar.h>

#include <time.h>

#include "putty.h"
#include "terminal.h"
#include "misc.h"

/*
 * Unix Unicode-handling routines.
 */

int is_dbcs_leadbyte(int codepage, char byte)
{
    return 0;			       /* we don't do DBCS */
}

int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
	     wchar_t *wcstr, int wclen)
{
    if (codepage == DEFAULT_CODEPAGE) {
	int n = 0;
	mbstate_t state = { 0 };

	setlocale(LC_CTYPE, "");

	while (mblen > 0) {
	    size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
	    if (i == (size_t)-1 || i == (size_t)-2)
		break;
	    n++;
	    mbstr += i;
	    mblen -= i;
	}

	setlocale(LC_CTYPE, "C");

	return n;
    } else if (codepage == CS_NONE) {
	int n = 0;

	while (mblen > 0) {
	    wcstr[n] = 0xD800 | (mbstr[0] & 0xFF);
	    n++;
	    mbstr++;
	    mblen--;
	}

	return n;
    } else
	return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
				  NULL, NULL, 0);
}

int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
	     char *mbstr, int mblen, char *defchr, int *defused)
{
    /* FIXME: we should remove the defused param completely... */
    if (defused)
	*defused = 0;

    if (codepage == DEFAULT_CODEPAGE) {
	char output[MB_LEN_MAX];
	mbstate_t state = { 0 };
	int n = 0;

	setlocale(LC_CTYPE, "");

	while (wclen > 0) {
	    int i = wcrtomb(output, wcstr[0], &state);
	    if (i == (size_t)-1 || i > n - mblen)
		break;
	    memcpy(mbstr+n, output, i);
	    n += i;
	    wcstr++;
	    wclen--;
	}

	setlocale(LC_CTYPE, "C");

	return n;
    } else if (codepage == CS_NONE) {
	int n = 0;
	while (wclen > 0 && n < mblen) {
	    if (*wcstr >= 0xD800 && *wcstr < 0xD900)
		mbstr[n++] = (*wcstr & 0xFF);
	    else if (defchr)
		mbstr[n++] = *defchr;
	    wcstr++;
	    wclen--;
	}
	return n;
    } else {
	return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
				    NULL, defchr?defchr:NULL, defchr?1:0);
    }
}

/*
 * Return value is TRUE if pterm is to run in direct-to-font mode.
 */
int init_ucs(char *linecharset, int font_charset)
{
    int i, ret = 0;

    /*
     * In the platform-independent parts of the code, font_codepage
     * is used only for system DBCS support - which we don't
     * support at all. So we set this to something which will never
     * be used.
     */
    font_codepage = -1;

    /*
     * line_codepage should be decoded from the specification in
     * cfg.
     */
    line_codepage = charset_from_mimeenc(linecharset);
    if (line_codepage == CS_NONE)
	line_codepage = charset_from_xenc(linecharset);

    /*
     * If line_codepage is _still_ CS_NONE, we assume we're using
     * the font's own encoding. This has been passed in to us, so
     * we use that. If it's still CS_NONE after _that_ - i.e. the
     * font we were given had an incomprehensible charset - then we
     * fall back to using the D800 page.
     */
    if (line_codepage == CS_NONE)
	line_codepage = font_charset;

    if (line_codepage == CS_NONE)
	ret = 1;

    /*
     * Set up unitab_line, by translating each individual character
     * in the line codepage into Unicode.
     */
    for (i = 0; i < 256; i++) {
	char c[1], *p;
	wchar_t wc[1];
	int len;
	c[0] = i;
	p = c;
	len = 1;
	if (line_codepage == CS_NONE)
	    unitab_line[i] = 0xD800 | i;
	else if (1 == charset_to_unicode(&p, &len, wc, 1, line_codepage,
					 NULL, L"", 0))
	    unitab_line[i] = wc[0];
	else
	    unitab_line[i] = 0xFFFD;
    }

    /*
     * Set up unitab_xterm. This is the same as unitab_line except
     * in the line-drawing regions, where it follows the Unicode
     * encoding.
     * 
     * (Note that the strange X encoding of line-drawing characters
     * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
     * by the font encoding, which will spot such a font and act as
     * if it were in a variant encoding of ISO8859-1.)
     */
    for (i = 0; i < 256; i++) {
	static const wchar_t unitab_xterm_std[32] = {
	    0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
	    0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
	    0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
	    0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
	};
	if (i >= 0x5F && i < 0x7F)
	    unitab_xterm[i] = unitab_xterm_std[i & 0x1F];
	else
	    unitab_xterm[i] = unitab_line[i];
    }

    /*
     * Set up unitab_scoacs. The SCO Alternate Character Set is
     * simply CP437.
     */
    for (i = 0; i < 256; i++) {
	char c[1], *p;
	wchar_t wc[1];
	int len;
	c[0] = i;
	p = c;
	len = 1;
	if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0))
	    unitab_scoacs[i] = wc[0];
	else
	    unitab_scoacs[i] = 0xFFFD;
    }

    /*
     * Find the control characters in the line codepage. For
     * direct-to-font mode using the D800 hack, we assume 00-1F and
     * 7F are controls, but allow 80-9F through. (It's as good a
     * guess as anything; and my bet is that half the weird fonts
     * used in this way will be IBM or MS code pages anyway.)
     */
    for (i = 0; i < 256; i++) {
	int lineval = unitab_line[i];
	if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) ||
	    (lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F))
	    unitab_ctrl[i] = i;
	else
	    unitab_ctrl[i] = 0xFF;
    }

    return ret;
}
Commit	Line	Data
1709795f	1	#include <stdio.h>
	2	#include <stdlib.h>
	3	#include <ctype.h>
2dc6356a	4	#include <locale.h>
	5	#include <limits.h>
	6	#include <wchar.h>
1709795f	7
1709795f	8	#include <time.h>
2dc6356a	9
1709795f	10	#include "putty.h"
887035a5	11	#include "terminal.h"
1709795f	12	#include "misc.h"
	13
	14	/*
	15	* Unix Unicode-handling routines.
1709795f	16	*/
1709795f	17
1709795f	18	int is_dbcs_leadbyte(int codepage, char byte)
	19	{
	20	return 0; /* we don't do DBCS */
	21	}
	22
	23	int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
	24	wchar_t *wcstr, int wclen)
	25	{
2dc6356a	26	if (codepage == DEFAULT_CODEPAGE) {
	27	int n = 0;
	28	mbstate_t state = { 0 };
	29
	30	setlocale(LC_CTYPE, "");
	31
	32	while (mblen > 0) {
	33	size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
	34	if (i == (size_t)-1 \|\| i == (size_t)-2)
	35	break;
	36	n++;
	37	mbstr += i;
	38	mblen -= i;
	39	}
	40
	41	setlocale(LC_CTYPE, "C");
	42
	43	return n;
facd762c	44	} else if (codepage == CS_NONE) {
	45	int n = 0;
	46
	47	while (mblen > 0) {
	48	wcstr[n] = 0xD800 \| (mbstr[0] & 0xFF);
	49	n++;
	50	mbstr++;
	51	mblen--;
	52	}
	53
	54	return n;
2dc6356a	55	} else
	56	return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
	57	NULL, NULL, 0);
e6346999	58	}
	59
	60	int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
	61	char mbstr, int mblen, char defchr, int *defused)
	62	{
2dc6356a	63	/* FIXME: we should remove the defused param completely... */
e6346999	64	if (defused)
e6346999	65	*defused = 0;
2dc6356a	66
	67	if (codepage == DEFAULT_CODEPAGE) {
	68	char output[MB_LEN_MAX];
	69	mbstate_t state = { 0 };
	70	int n = 0;
	71
	72	setlocale(LC_CTYPE, "");
	73
	74	while (wclen > 0) {
	75	int i = wcrtomb(output, wcstr[0], &state);
	76	if (i == (size_t)-1 \|\| i > n - mblen)
	77	break;
	78	memcpy(mbstr+n, output, i);
	79	n += i;
	80	wcstr++;
	81	wclen--;
	82	}
	83
	84	setlocale(LC_CTYPE, "C");
	85
	86	return n;
facd762c	87	} else if (codepage == CS_NONE) {
	88	int n = 0;
	89	while (wclen > 0 && n < mblen) {
	90	if (wcstr >= 0xD800 && wcstr < 0xD900)
	91	mbstr[n++] = (*wcstr & 0xFF);
	92	else if (defchr)
	93	mbstr[n++] = *defchr;
	94	wcstr++;
	95	wclen--;
	96	}
	97	return n;
	98	} else {
2dc6356a	99	return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
0f993689	100	NULL, defchr?defchr:NULL, defchr?1:0);
facd762c	101	}
1709795f	102	}
1709795f	103
085f4a68	104	/*
	105	* Return value is TRUE if pterm is to run in direct-to-font mode.
	106	*/
8772ac69	107	int init_ucs(char *linecharset, int font_charset)
1709795f	108	{
085f4a68	109	int i, ret = 0;
2dc6356a	110
	111	/*
	112	* In the platform-independent parts of the code, font_codepage
	113	* is used only for system DBCS support - which we don't
	114	* support at all. So we set this to something which will never
	115	* be used.
	116	*/
	117	font_codepage = -1;
	118
	119	/*
	120	* line_codepage should be decoded from the specification in
	121	* cfg.
	122	*/
8772ac69	123	line_codepage = charset_from_mimeenc(linecharset);
2dc6356a	124	if (line_codepage == CS_NONE)
8772ac69	125	line_codepage = charset_from_xenc(linecharset);
2dc6356a	126
facd762c	127	/*
	128	* If line_codepage is _still_ CS_NONE, we assume we're using
	129	* the font's own encoding. This has been passed in to us, so
	130	* we use that. If it's still CS_NONE after _that_ - i.e. the
	131	* font we were given had an incomprehensible charset - then we
	132	* fall back to using the D800 page.
	133	*/
2dc6356a	134	if (line_codepage == CS_NONE)
facd762c	135	line_codepage = font_charset;
2dc6356a	136
085f4a68	137	if (line_codepage == CS_NONE)
	138	ret = 1;
	139
2dc6356a	140	/*
	141	* Set up unitab_line, by translating each individual character
	142	* in the line codepage into Unicode.
	143	*/
	144	for (i = 0; i < 256; i++) {
	145	char c[1], *p;
	146	wchar_t wc[1];
	147	int len;
	148	c[0] = i;
	149	p = c;
	150	len = 1;
facd762c	151	if (line_codepage == CS_NONE)
	152	unitab_line[i] = 0xD800 \| i;
	153	else if (1 == charset_to_unicode(&p, &len, wc, 1, line_codepage,
	154	NULL, L"", 0))
2dc6356a	155	unitab_line[i] = wc[0];
1709795f	156	else
2dc6356a	157	unitab_line[i] = 0xFFFD;
2dc6356a	158	}
1709795f	159
2dc6356a	160	/*
	161	* Set up unitab_xterm. This is the same as unitab_line except
	162	* in the line-drawing regions, where it follows the Unicode
	163	* encoding.
	164	*
	165	* (Note that the strange X encoding of line-drawing characters
	166	* in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
	167	* by the font encoding, which will spot such a font and act as
	168	* if it were in a variant encoding of ISO8859-1.)
	169	*/
1709795f	170	for (i = 0; i < 256; i++) {
2dc6356a	171	static const wchar_t unitab_xterm_std[32] = {
	172	0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
	173	0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
	174	0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
	175	0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
	176	};
	177	if (i >= 0x5F && i < 0x7F)
	178	unitab_xterm[i] = unitab_xterm_std[i & 0x1F];
	179	else
	180	unitab_xterm[i] = unitab_line[i];
1709795f	181	}
2dc6356a	182
	183	/*
	184	* Set up unitab_scoacs. The SCO Alternate Character Set is
	185	* simply CP437.
	186	*/
	187	for (i = 0; i < 256; i++) {
	188	char c[1], *p;
	189	wchar_t wc[1];
	190	int len;
	191	c[0] = i;
	192	p = c;
	193	len = 1;
facd762c	194	if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0))
2dc6356a	195	unitab_scoacs[i] = wc[0];
	196	else
	197	unitab_scoacs[i] = 0xFFFD;
	198	}
	199
facd762c	200	/*
	201	* Find the control characters in the line codepage. For
	202	* direct-to-font mode using the D800 hack, we assume 00-1F and
	203	* 7F are controls, but allow 80-9F through. (It's as good a
	204	* guess as anything; and my bet is that half the weird fonts
	205	* used in this way will be IBM or MS code pages anyway.)
	206	*/
	207	for (i = 0; i < 256; i++) {
	208	int lineval = unitab_line[i];
	209	if (lineval < ' ' \|\| (lineval >= 0x7F && lineval < 0xA0) \|\|
	210	(lineval >= 0xD800 && lineval < 0xD820) \|\| (lineval == 0xD87F))
2dc6356a	211	unitab_ctrl[i] = i;
	212	else
	213	unitab_ctrl[i] = 0xFF;
facd762c	214	}
085f4a68	215
085f4a68	216	return ret;
126ce234	217	}