15 * Unix Unicode-handling routines.
18 int is_dbcs_leadbyte(int codepage
, char byte
)
20 return 0; /* we don't do DBCS */
23 int mb_to_wc(int codepage
, int flags
, char *mbstr
, int mblen
,
24 wchar_t *wcstr
, int wclen
)
26 if (codepage
== DEFAULT_CODEPAGE
) {
28 mbstate_t state
= { 0 };
30 setlocale(LC_CTYPE
, "");
33 size_t i
= mbrtowc(wcstr
+n
, mbstr
, (size_t)mblen
, &state
);
34 if (i
== (size_t)-1 || i
== (size_t)-2)
41 setlocale(LC_CTYPE
, "C");
45 return charset_to_unicode(&mbstr
, &mblen
, wcstr
, wclen
, codepage
,
49 int wc_to_mb(int codepage
, int flags
, wchar_t *wcstr
, int wclen
,
50 char *mbstr
, int mblen
, char *defchr
, int *defused
)
52 /* FIXME: we should remove the defused param completely... */
56 if (codepage
== DEFAULT_CODEPAGE
) {
57 char output
[MB_LEN_MAX
];
58 mbstate_t state
= { 0 };
61 setlocale(LC_CTYPE
, "");
64 int i
= wcrtomb(output
, wcstr
[0], &state
);
65 if (i
== (size_t)-1 || i
> n
- mblen
)
67 memcpy(mbstr
+n
, output
, i
);
73 setlocale(LC_CTYPE
, "C");
77 return charset_from_unicode(&wcstr
, &wclen
, mbstr
, mblen
, codepage
,
86 * In the platform-independent parts of the code, font_codepage
87 * is used only for system DBCS support - which we don't
88 * support at all. So we set this to something which will never
94 * line_codepage should be decoded from the specification in
97 line_codepage
= charset_from_mimeenc(cfg
.line_codepage
);
98 if (line_codepage
== CS_NONE
)
99 line_codepage
= charset_from_xenc(cfg
.line_codepage
);
100 /* If it's still CS_NONE, we should assume direct-to-font. */
102 /* FIXME: this is a hack. Currently fonts with incomprehensible
103 * encodings are dealt with by pretending they're 8859-1. It's
104 * ugly, but it's good enough to stop things crashing. Should do
105 * something better here. */
106 if (line_codepage
== CS_NONE
)
107 line_codepage
= CS_ISO8859_1
;
110 * Set up unitab_line, by translating each individual character
111 * in the line codepage into Unicode.
113 for (i
= 0; i
< 256; i
++) {
120 if (1 == charset_to_unicode(&p
,&len
,wc
,1,line_codepage
,NULL
,L
"",0))
121 unitab_line
[i
] = wc
[0];
123 unitab_line
[i
] = 0xFFFD;
127 * Set up unitab_xterm. This is the same as unitab_line except
128 * in the line-drawing regions, where it follows the Unicode
131 * (Note that the strange X encoding of line-drawing characters
132 * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
133 * by the font encoding, which will spot such a font and act as
134 * if it were in a variant encoding of ISO8859-1.)
136 for (i
= 0; i
< 256; i
++) {
137 static const wchar_t unitab_xterm_std
[32] = {
138 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
139 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
140 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
141 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
143 if (i
>= 0x5F && i
< 0x7F)
144 unitab_xterm
[i
] = unitab_xterm_std
[i
& 0x1F];
146 unitab_xterm
[i
] = unitab_line
[i
];
150 * Set up unitab_scoacs. The SCO Alternate Character Set is
153 for (i
= 0; i
< 256; i
++) {
160 if (1 == charset_to_unicode(&p
,&len
,wc
,1,CS_CP437
,NULL
,L
"",0))
161 unitab_scoacs
[i
] = wc
[0];
163 unitab_scoacs
[i
] = 0xFFFD;
166 /* Find the line control characters. */
167 for (i
= 0; i
< 256; i
++)
168 if (unitab_line
[i
] < ' '
169 || (unitab_line
[i
] >= 0x7F && unitab_line
[i
] < 0xA0))
172 unitab_ctrl
[i
] = 0xFF;