[u/mdw/putty] / unix / uxucs.c

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <locale.h>
#include <limits.h>
#include <wchar.h>

#include <time.h>

#include "putty.h"
#include "terminal.h"
#include "misc.h"

/*
 * Unix Unicode-handling routines.
 */

int is_dbcs_leadbyte(int codepage, char byte)
{
    return 0;			       /* we don't do DBCS */
}

int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
	     wchar_t *wcstr, int wclen)
{
    if (codepage == DEFAULT_CODEPAGE) {
	int n = 0;
	mbstate_t state = { 0 };

	setlocale(LC_CTYPE, "");

	while (mblen > 0) {
	    size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
	    if (i == (size_t)-1 || i == (size_t)-2)
		break;
	    n++;
	    mbstr += i;
	    mblen -= i;
	}

	setlocale(LC_CTYPE, "C");

	return n;
    } else
	return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
				  NULL, NULL, 0);
}

int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
	     char *mbstr, int mblen, char *defchr, int *defused)
{
    /* FIXME: we should remove the defused param completely... */
    if (defused)
	*defused = 0;

    if (codepage == DEFAULT_CODEPAGE) {
	char output[MB_LEN_MAX];
	mbstate_t state = { 0 };
	int n = 0;

	setlocale(LC_CTYPE, "");

	while (wclen > 0) {
	    int i = wcrtomb(output, wcstr[0], &state);
	    if (i == (size_t)-1 || i > n - mblen)
		break;
	    memcpy(mbstr+n, output, i);
	    n += i;
	    wcstr++;
	    wclen--;
	}

	setlocale(LC_CTYPE, "C");

	return n;
    } else
	return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
				    NULL, NULL, 0);
}

void init_ucs(void)
{
    int i;

    /*
     * In the platform-independent parts of the code, font_codepage
     * is used only for system DBCS support - which we don't
     * support at all. So we set this to something which will never
     * be used.
     */
    font_codepage = -1;

    /*
     * line_codepage should be decoded from the specification in
     * cfg.
     */
    line_codepage = charset_from_mimeenc(cfg.line_codepage);
    if (line_codepage == CS_NONE)
	line_codepage = charset_from_xenc(cfg.line_codepage);
    /* If it's still CS_NONE, we should assume direct-to-font. */

    /* FIXME: this is a hack. Currently fonts with incomprehensible
     * encodings are dealt with by pretending they're 8859-1. It's
     * ugly, but it's good enough to stop things crashing. Should do
     * something better here. */
    if (line_codepage == CS_NONE)
	line_codepage = CS_ISO8859_1;

    /*
     * Set up unitab_line, by translating each individual character
     * in the line codepage into Unicode.
     */
    for (i = 0; i < 256; i++) {
	char c[1], *p;
	wchar_t wc[1];
	int len;
	c[0] = i;
	p = c;
	len = 1;
	if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0))
	    unitab_line[i] = wc[0];
	else
	    unitab_line[i] = 0xFFFD;
    }

    /*
     * Set up unitab_xterm. This is the same as unitab_line except
     * in the line-drawing regions, where it follows the Unicode
     * encoding.
     * 
     * (Note that the strange X encoding of line-drawing characters
     * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
     * by the font encoding, which will spot such a font and act as
     * if it were in a variant encoding of ISO8859-1.)
     */
    for (i = 0; i < 256; i++) {
	static const wchar_t unitab_xterm_std[32] = {
	    0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
	    0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
	    0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
	    0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
	};
	if (i >= 0x5F && i < 0x7F)
	    unitab_xterm[i] = unitab_xterm_std[i & 0x1F];
	else
	    unitab_xterm[i] = unitab_line[i];
    }

    /*
     * Set up unitab_scoacs. The SCO Alternate Character Set is
     * simply CP437.
     */
    for (i = 0; i < 256; i++) {
	char c[1], *p;
	wchar_t wc[1];
	int len;
	c[0] = i;
	p = c;
	len = 1;
	if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0))
	    unitab_scoacs[i] = wc[0];
	else
	    unitab_scoacs[i] = 0xFFFD;
    }

    /* Find the line control characters. */
    for (i = 0; i < 256; i++)
	if (unitab_line[i] < ' '
	    || (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0))
	    unitab_ctrl[i] = i;
	else
	    unitab_ctrl[i] = 0xFF;
}
Commit	Line	Data
1709795f	1	#include <stdio.h>
	2	#include <stdlib.h>
	3	#include <ctype.h>
2dc6356a	4	#include <locale.h>
	5	#include <limits.h>
	6	#include <wchar.h>
1709795f	7
1709795f	8	#include <time.h>
2dc6356a	9
1709795f	10	#include "putty.h"
887035a5	11	#include "terminal.h"
1709795f	12	#include "misc.h"
	13
	14	/*
	15	* Unix Unicode-handling routines.
1709795f	16	*/
1709795f	17
1709795f	18	int is_dbcs_leadbyte(int codepage, char byte)
	19	{
	20	return 0; /* we don't do DBCS */
	21	}
	22
	23	int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
	24	wchar_t *wcstr, int wclen)
	25	{
2dc6356a	26	if (codepage == DEFAULT_CODEPAGE) {
	27	int n = 0;
	28	mbstate_t state = { 0 };
	29
	30	setlocale(LC_CTYPE, "");
	31
	32	while (mblen > 0) {
	33	size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
	34	if (i == (size_t)-1 \|\| i == (size_t)-2)
	35	break;
	36	n++;
	37	mbstr += i;
	38	mblen -= i;
	39	}
	40
	41	setlocale(LC_CTYPE, "C");
	42
	43	return n;
	44	} else
	45	return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
	46	NULL, NULL, 0);
e6346999	47	}
	48
	49	int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
	50	char mbstr, int mblen, char defchr, int *defused)
	51	{
2dc6356a	52	/* FIXME: we should remove the defused param completely... */
e6346999	53	if (defused)
e6346999	54	*defused = 0;
2dc6356a	55
	56	if (codepage == DEFAULT_CODEPAGE) {
	57	char output[MB_LEN_MAX];
	58	mbstate_t state = { 0 };
	59	int n = 0;
	60
	61	setlocale(LC_CTYPE, "");
	62
	63	while (wclen > 0) {
	64	int i = wcrtomb(output, wcstr[0], &state);
	65	if (i == (size_t)-1 \|\| i > n - mblen)
	66	break;
	67	memcpy(mbstr+n, output, i);
	68	n += i;
	69	wcstr++;
	70	wclen--;
	71	}
	72
	73	setlocale(LC_CTYPE, "C");
	74
	75	return n;
	76	} else
	77	return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
	78	NULL, NULL, 0);
1709795f	79	}
	80
	81	void init_ucs(void)
	82	{
	83	int i;
2dc6356a	84
	85	/*
	86	* In the platform-independent parts of the code, font_codepage
	87	* is used only for system DBCS support - which we don't
	88	* support at all. So we set this to something which will never
	89	* be used.
	90	*/
	91	font_codepage = -1;
	92
	93	/*
	94	* line_codepage should be decoded from the specification in
	95	* cfg.
	96	*/
	97	line_codepage = charset_from_mimeenc(cfg.line_codepage);
	98	if (line_codepage == CS_NONE)
	99	line_codepage = charset_from_xenc(cfg.line_codepage);
	100	/* If it's still CS_NONE, we should assume direct-to-font. */
	101
	102	/* FIXME: this is a hack. Currently fonts with incomprehensible
	103	* encodings are dealt with by pretending they're 8859-1. It's
	104	* ugly, but it's good enough to stop things crashing. Should do
	105	* something better here. */
	106	if (line_codepage == CS_NONE)
	107	line_codepage = CS_ISO8859_1;
	108
	109	/*
	110	* Set up unitab_line, by translating each individual character
	111	* in the line codepage into Unicode.
	112	*/
	113	for (i = 0; i < 256; i++) {
	114	char c[1], *p;
	115	wchar_t wc[1];
	116	int len;
	117	c[0] = i;
	118	p = c;
	119	len = 1;
	120	if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0))
	121	unitab_line[i] = wc[0];
1709795f	122	else
2dc6356a	123	unitab_line[i] = 0xFFFD;
2dc6356a	124	}
1709795f	125
2dc6356a	126	/*
	127	* Set up unitab_xterm. This is the same as unitab_line except
	128	* in the line-drawing regions, where it follows the Unicode
	129	* encoding.
	130	*
	131	* (Note that the strange X encoding of line-drawing characters
	132	* in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
	133	* by the font encoding, which will spot such a font and act as
	134	* if it were in a variant encoding of ISO8859-1.)
	135	*/
1709795f	136	for (i = 0; i < 256; i++) {
2dc6356a	137	static const wchar_t unitab_xterm_std[32] = {
	138	0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
	139	0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
	140	0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
	141	0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
	142	};
	143	if (i >= 0x5F && i < 0x7F)
	144	unitab_xterm[i] = unitab_xterm_std[i & 0x1F];
	145	else
	146	unitab_xterm[i] = unitab_line[i];
1709795f	147	}
2dc6356a	148
	149	/*
	150	* Set up unitab_scoacs. The SCO Alternate Character Set is
	151	* simply CP437.
	152	*/
	153	for (i = 0; i < 256; i++) {
	154	char c[1], *p;
	155	wchar_t wc[1];
	156	int len;
	157	c[0] = i;
	158	p = c;
	159	len = 1;
	160	if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0))
	161	unitab_scoacs[i] = wc[0];
	162	else
	163	unitab_scoacs[i] = 0xFFFD;
	164	}
	165
	166	/* Find the line control characters. */
	167	for (i = 0; i < 256; i++)
	168	if (unitab_line[i] < ' '
	169	\|\| (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0))
	170	unitab_ctrl[i] = i;
	171	else
	172	unitab_ctrl[i] = 0xFF;
126ce234	173	}