mdw@git.distorted.org.uk Git - sgt/putty/blob - unix/uxucs.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <ctype.h>
   4 #include <locale.h>
   5 #include <limits.h>
   6 #include <wchar.h>
   7
   8 #include <time.h>
   9
  10 #include "putty.h"
  11 #include "terminal.h"
  12 #include "misc.h"
  13
  14 /*
  15  * Unix Unicode-handling routines.
  16  */
  17
  18 int is_dbcs_leadbyte(int codepage, char byte)
  19 {
  20     return 0;                          /* we don't do DBCS */
  21 }
  22
  23 int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
  24              wchar_t *wcstr, int wclen)
  25 {
  26     if (codepage == DEFAULT_CODEPAGE) {
  27         int n = 0;
  28         mbstate_t state = { 0 };
  29
  30         setlocale(LC_CTYPE, "");
  31
  32         while (mblen > 0) {
  33             size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state);
  34             if (i == (size_t)-1 || i == (size_t)-2)
  35                 break;
  36             n++;
  37             mbstr += i;
  38             mblen -= i;
  39         }
  40
  41         setlocale(LC_CTYPE, "C");
  42
  43         return n;
  44     } else
  45         return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
  46                                   NULL, NULL, 0);
  47 }
  48
  49 int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
  50              char *mbstr, int mblen, char *defchr, int *defused)
  51 {
  52     /* FIXME: we should remove the defused param completely... */
  53     if (defused)
  54         *defused = 0;
  55
  56     if (codepage == DEFAULT_CODEPAGE) {
  57         char output[MB_LEN_MAX];
  58         mbstate_t state = { 0 };
  59         int n = 0;
  60
  61         setlocale(LC_CTYPE, "");
  62
  63         while (wclen > 0) {
  64             int i = wcrtomb(output, wcstr[0], &state);
  65             if (i == (size_t)-1 || i > n - mblen)
  66                 break;
  67             memcpy(mbstr+n, output, i);
  68             n += i;
  69             wcstr++;
  70             wclen--;
  71         }
  72
  73         setlocale(LC_CTYPE, "C");
  74
  75         return n;
  76     } else
  77         return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
  78                                     NULL, NULL, 0);
  79 }
  80
  81 void init_ucs(void)
  82 {
  83     int i;
  84
  85     /*
  86      * In the platform-independent parts of the code, font_codepage
  87      * is used only for system DBCS support - which we don't
  88      * support at all. So we set this to something which will never
  89      * be used.
  90      */
  91     font_codepage = -1;
  92
  93     /*
  94      * line_codepage should be decoded from the specification in
  95      * cfg.
  96      */
  97     line_codepage = charset_from_mimeenc(cfg.line_codepage);
  98     if (line_codepage == CS_NONE)
  99         line_codepage = charset_from_xenc(cfg.line_codepage);
 100     /* If it's still CS_NONE, we should assume direct-to-font. */
 101
 102     /* FIXME: this is a hack. Currently fonts with incomprehensible
 103      * encodings are dealt with by pretending they're 8859-1. It's
 104      * ugly, but it's good enough to stop things crashing. Should do
 105      * something better here. */
 106     if (line_codepage == CS_NONE)
 107         line_codepage = CS_ISO8859_1;
 108
 109     /*
 110      * Set up unitab_line, by translating each individual character
 111      * in the line codepage into Unicode.
 112      */
 113     for (i = 0; i < 256; i++) {
 114         char c[1], *p;
 115         wchar_t wc[1];
 116         int len;
 117         c[0] = i;
 118         p = c;
 119         len = 1;
 120         if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0))
 121             unitab_line[i] = wc[0];
 122         else
 123             unitab_line[i] = 0xFFFD;
 124     }
 125
 126     /*
 127      * Set up unitab_xterm. This is the same as unitab_line except
 128      * in the line-drawing regions, where it follows the Unicode
 129      * encoding.
 130      *
 131      * (Note that the strange X encoding of line-drawing characters
 132      * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of
 133      * by the font encoding, which will spot such a font and act as
 134      * if it were in a variant encoding of ISO8859-1.)
 135      */
 136     for (i = 0; i < 256; i++) {
 137         static const wchar_t unitab_xterm_std[32] = {
 138             0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
 139             0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
 140             0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
 141             0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
 142         };
 143         if (i >= 0x5F && i < 0x7F)
 144             unitab_xterm[i] = unitab_xterm_std[i & 0x1F];
 145         else
 146             unitab_xterm[i] = unitab_line[i];
 147     }
 148
 149     /*
 150      * Set up unitab_scoacs. The SCO Alternate Character Set is
 151      * simply CP437.
 152      */
 153     for (i = 0; i < 256; i++) {
 154         char c[1], *p;
 155         wchar_t wc[1];
 156         int len;
 157         c[0] = i;
 158         p = c;
 159         len = 1;
 160         if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0))
 161             unitab_scoacs[i] = wc[0];
 162         else
 163             unitab_scoacs[i] = 0xFFFD;
 164     }
 165
 166     /* Find the line control characters. */
 167     for (i = 0; i < 256; i++)
 168         if (unitab_line[i] < ' '
 169             || (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0))
 170             unitab_ctrl[i] = i;
 171         else
 172             unitab_ctrl[i] = 0xFF;
 173 }