Proper support for using the font's own character encoding. If we

author simon <simon@cda61777-01e9-0310-a592-d414129be87e>

Wed, 1 Jan 2003 22:25:25 +0000 (22:25 +0000)

committer simon <simon@cda61777-01e9-0310-a592-d414129be87e>

Wed, 1 Jan 2003 22:25:25 +0000 (22:25 +0000)
author simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Wed, 1 Jan 2003 22:25:25 +0000 (22:25 +0000)
committer simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Wed, 1 Jan 2003 22:25:25 +0000 (22:25 +0000)
diff --git a/putty.h b/putty.h

index 113d355..c956d50 100644 (file)
--- a/putty.h
+++ b/putty.h
@@ -589,7 +589,7 @@ extern char ver[];
  #ifndef CP_UTF8
  #define CP_UTF8 65001
  #endif
-void init_ucs(void);
+/* void init_ucs(void); -- this is now in platform-specific headers */
  int is_dbcs_leadbyte(int codepage, char byte);
  int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
              wchar_t *wcstr, int wclen);
diff --git a/unix/pterm.1 b/unix/pterm.1

index e08d033..bbdb081 100644 (file)
--- a/unix/pterm.1
+++ b/unix/pterm.1
@@ -110,6 +110,10 @@ supported by \fIpterm\fP) should be valid here (examples are
  encoding which is valid in an X logical font description should be
  valid ("ibm-cp437", for example).
  
+\fIpterm\fP's default behaviour is to use the same character
+encoding as its primary font. If you supply a Unicode (iso10646-1)
+font, it will default to the UTF-8 character set.
+
  Character set names are case-insensitive.
  .IP "\fB\-nethack\fP"
  Tells \fIpterm\fP to enable NetHack keypad mode, in which the
diff --git a/unix/pterm.c b/unix/pterm.c

index fa9fe1d..b7f93f9 100644 (file)
--- a/unix/pterm.c
+++ b/unix/pterm.c
@@ -836,18 +836,27 @@ gint key_event(GtkWidget *widget, GdkEventKey *event, gpointer data)
         printf("\n");
  #endif
  
-       /*
-        * The stuff we've just generated is assumed to be
-        * ISO-8859-1! This sounds insane, but `man XLookupString'
-        * agrees: strings of this type returned from the X server
-        * are hardcoded to 8859-1. Strictly speaking we should be
-        * doing this using some sort of GtkIMContext, which (if
-        * we're lucky) would give us our data directly in Unicode;
-        * but that's not supported in GTK 1.2 as far as I can
-        * tell, and it's poorly documented even in 2.0, so it'll
-        * have to wait.
-        */
-       lpage_send(inst->ldisc, CS_ISO8859_1, output+start, end-start, 1);
+       if (inst->fontinfo[0].charset != CS_NONE) {
+           /*
+            * The stuff we've just generated is assumed to be
+            * ISO-8859-1! This sounds insane, but `man
+            * XLookupString' agrees: strings of this type returned
+            * from the X server are hardcoded to 8859-1. Strictly
+            * speaking we should be doing this using some sort of
+            * GtkIMContext, which (if we're lucky) would give us
+            * our data directly in Unicode; but that's not
+            * supported in GTK 1.2 as far as I can tell, and it's
+            * poorly documented even in 2.0, so it'll have to
+            * wait.
+            */
+           lpage_send(inst->ldisc, CS_ISO8859_1, output+start, end-start, 1);
+       } else {
+           /*
+            * In direct-to-font mode, we just send the string
+            * exactly as we received it.
+            */
+           ldisc_send(inst->ldisc, output+start, end-start, 1);
+       }
  
         show_mouseptr(inst, 0);
         term_seen_key_event(inst->term);
@@ -1218,17 +1227,25 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect)
      if (inst->pasteout_data_utf8)
         sfree(inst->pasteout_data_utf8);
  
-    inst->pasteout_data_utf8 = smalloc(len*6);
-    inst->pasteout_data_utf8_len = len*6;
-    {
+    /*
+     * Set up UTF-8 paste data. This only happens if we aren't in
+     * direct-to-font mode using the D800 hack.
+     */
+    if (inst->fontinfo[0].charset != CS_NONE) {
         wchar_t *tmp = data;
         int tmplen = len;
+
+       inst->pasteout_data_utf8 = smalloc(len*6);
+       inst->pasteout_data_utf8_len = len*6;
         inst->pasteout_data_utf8_len =
             charset_from_unicode(&tmp, &tmplen, inst->pasteout_data_utf8,
                                  inst->pasteout_data_utf8_len,
                                  CS_UTF8, NULL, NULL, 0);
         inst->pasteout_data_utf8 =
             srealloc(inst->pasteout_data_utf8, inst->pasteout_data_utf8_len);
+    } else {
+       inst->pasteout_data_utf8 = NULL;
+       inst->pasteout_data_utf8_len = 0;
      }
  
      inst->pasteout_data = smalloc(len);
@@ -1243,8 +1260,9 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect)
                                  GDK_SELECTION_TYPE_STRING, 1);
         gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
                                  inst->compound_text_atom, 1);
-       gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
-                                inst->utf8_string_atom, 1);
+       if (inst->pasteout_data_utf8)
+           gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
+                                    inst->utf8_string_atom, 1);
      }
  }
  
@@ -1286,15 +1304,24 @@ void request_paste(void *frontend)
       * comes back _then_ we can call term_do_paste().
       */
  
-    /*
-     * First we attempt to retrieve the selection as a UTF-8 string
-     * (which we will convert to the correct code page before
-     * sending to the session, of course). If that fails,
-     * selection_received() will be informed and will fall back to
-     * an ordinary string.
-     */
-    gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
-                         inst->utf8_string_atom, GDK_CURRENT_TIME);
+    if (inst->fontinfo[0].charset != CS_NONE) {
+       /*
+        * First we attempt to retrieve the selection as a UTF-8
+        * string (which we will convert to the correct code page
+        * before sending to the session, of course). If that
+        * fails, selection_received() will be informed and will
+        * fall back to an ordinary string.
+        */
+       gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
+                             inst->utf8_string_atom, GDK_CURRENT_TIME);
+    } else {
+       /*
+        * If we're in direct-to-font mode, we disable UTF-8
+        * pasting, and go straight to ordinary string data.
+        */
+       gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
+                             GDK_SELECTION_TYPE_STRING, GDK_CURRENT_TIME);
+    }
  }
  
  gint idle_paste_func(gpointer data);   /* forward ref */
@@ -1562,12 +1589,9 @@ void do_text_internal(Context ctx, int x, int y, char *text, int len,
                              gwcs, len*2);
             sfree(gwcs);
         } else {
-           wchar_t *wcstmp = wcs;
-           int lentmp = len;
             gcs = smalloc(sizeof(GdkWChar) * (len+1));
-           charset_from_unicode(&wcstmp, &lentmp, gcs, len,
-                                inst->fontinfo[fontid].charset,
-                                NULL, ".", 1);
+           wc_to_mb(inst->fontinfo[fontid].charset, 0,
+                    wcs, len, gcs, len, ".", NULL);
             gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc,
                           x*inst->font_width+cfg.window_border,
                           y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent,
@@ -2101,13 +2125,20 @@ static void block_signal(int sig, int block_it) {
    }
  }
  
-static void set_font_info(struct gui_data *inst, int fontid)
+/*
+ * This function retrieves the character set encoding of a font. It
+ * returns the character set without the X11 hack (in case the user
+ * asks to use the font's own encoding).
+ */
+static int set_font_info(struct gui_data *inst, int fontid)
  {
      GdkFont *font = inst->fonts[fontid];
      XFontStruct *xfs = GDK_FONT_XFONT(font);
      Display *disp = GDK_FONT_XDISPLAY(font);
      Atom charset_registry, charset_encoding;
      unsigned long registry_ret, encoding_ret;
+    int retval = CS_NONE;
+
      charset_registry = XInternAtom(disp, "CHARSET_REGISTRY", False);
      charset_encoding = XInternAtom(disp, "CHARSET_ENCODING", False);
      inst->fontinfo[fontid].charset = CS_NONE;
@@ -2119,10 +2150,13 @@ static void set_font_info(struct gui_data *inst, int fontid)
         enc = XGetAtomName(disp, (Atom)encoding_ret);
         if (reg && enc) {
             char *encoding = dupcat(reg, "-", enc, NULL);
-           inst->fontinfo[fontid].charset = charset_from_xenc(encoding);
+           retval = inst->fontinfo[fontid].charset =
+               charset_from_xenc(encoding);
             /* FIXME: when libcharset supports wide encodings fix this. */
-           if (!strcasecmp(encoding, "iso10646-1"))
+           if (!strcasecmp(encoding, "iso10646-1")) {
                 inst->fontinfo[fontid].is_wide = 1;
+               retval = CS_UTF8;
+           }
  
             /*
              * Hack for X line-drawing characters: if the primary
@@ -2148,19 +2182,11 @@ static void set_font_info(struct gui_data *inst, int fontid)
                     inst->fontinfo[fontid].charset = CS_ISO8859_1_X11;
             }
  
-           /*
-            * FIXME: this is a hack. Currently fonts with
-            * incomprehensible encodings are dealt with by
-            * pretending they're 8859-1. It's ugly, but it's good
-            * enough to stop things crashing. Should do something
-            * better here.
-            */
-           if (inst->fontinfo[fontid].charset == CS_NONE)
-               inst->fontinfo[fontid].charset = CS_ISO8859_1;
-
             sfree(encoding);
         }
      }
+
+    return retval;
  }
  
  int main(int argc, char **argv)
@@ -2168,6 +2194,7 @@ int main(int argc, char **argv)
      extern int pty_master_fd;         /* declared in pty.c */
      extern void pty_pre_init(void);    /* declared in pty.c */
      struct gui_data *inst;
+    int font_charset;
  
      /* defer any child exit handling until we're ready to deal with
       * it */
@@ -2195,7 +2222,7 @@ int main(int argc, char **argv)
         fprintf(stderr, "pterm: unable to load font \"%s\"\n", cfg.font);
         exit(1);
      }
-    set_font_info(inst, 0);
+    font_charset = set_font_info(inst, 0);
      if (cfg.boldfont[0]) {
         inst->fonts[1] = gdk_font_load(cfg.boldfont);
         if (!inst->fonts[1]) {
@@ -2233,7 +2260,7 @@ int main(int argc, char **argv)
      inst->compound_text_atom = gdk_atom_intern("COMPOUND_TEXT", FALSE);
      inst->utf8_string_atom = gdk_atom_intern("UTF8_STRING", FALSE);
  
-    init_ucs();
+    init_ucs(font_charset);
  
      inst->window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
  
diff --git a/unix/unix.h b/unix/unix.h

index 9aa044d..ab5dc88 100644 (file)
--- a/unix/unix.h
+++ b/unix/unix.h
@@ -66,4 +66,9 @@ int next_socket(int *state, int *rwx);
  /* BSD-semantics version of signal() */
  void (*putty_signal(int sig, void (*func)(int)))(int);
  
+/*
+ * Exports from unicode.c.
+ */
+void init_ucs(int font_charset);
+
  #endif
diff --git a/unix/uxucs.c b/unix/uxucs.c

index 2bf65a8..928acae 100644 (file)
--- a/unix/uxucs.c
+++ b/unix/uxucs.c
@@ -41,6 +41,17 @@ int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
         setlocale(LC_CTYPE, "C");
  
         return n;
+    } else if (codepage == CS_NONE) {
+       int n = 0;
+
+       while (mblen > 0) {
+           wcstr[n] = 0xD800 | (mbstr[0] & 0xFF);
+           n++;
+           mbstr++;
+           mblen--;
+       }
+
+       return n;
      } else
         return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
                                   NULL, NULL, 0);
@@ -73,12 +84,24 @@ int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
         setlocale(LC_CTYPE, "C");
  
         return n;
-    } else
+    } else if (codepage == CS_NONE) {
+       int n = 0;
+       while (wclen > 0 && n < mblen) {
+           if (*wcstr >= 0xD800 && *wcstr < 0xD900)
+               mbstr[n++] = (*wcstr & 0xFF);
+           else if (defchr)
+               mbstr[n++] = *defchr;
+           wcstr++;
+           wclen--;
+       }
+       return n;
+    } else {
         return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
                                     NULL, NULL, 0);
+    }
  }
  
-void init_ucs(void)
+void init_ucs(int font_charset)
  {
      int i;
  
@@ -97,14 +120,16 @@ void init_ucs(void)
      line_codepage = charset_from_mimeenc(cfg.line_codepage);
      if (line_codepage == CS_NONE)
         line_codepage = charset_from_xenc(cfg.line_codepage);
-    /* If it's still CS_NONE, we should assume direct-to-font. */
  
-    /* FIXME: this is a hack. Currently fonts with incomprehensible
-     * encodings are dealt with by pretending they're 8859-1. It's
-     * ugly, but it's good enough to stop things crashing. Should do
-     * something better here. */
+    /*
+     * If line_codepage is _still_ CS_NONE, we assume we're using
+     * the font's own encoding. This has been passed in to us, so
+     * we use that. If it's still CS_NONE after _that_ - i.e. the
+     * font we were given had an incomprehensible charset - then we
+     * fall back to using the D800 page.
+     */
      if (line_codepage == CS_NONE)
-       line_codepage = CS_ISO8859_1;
+       line_codepage = font_charset;
  
      /*
       * Set up unitab_line, by translating each individual character
@@ -117,7 +142,10 @@ void init_ucs(void)
         c[0] = i;
         p = c;
         len = 1;
-       if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0))
+       if (line_codepage == CS_NONE)
+           unitab_line[i] = 0xD800 | i;
+       else if (1 == charset_to_unicode(&p, &len, wc, 1, line_codepage,
+                                        NULL, L"", 0))
             unitab_line[i] = wc[0];
         else
             unitab_line[i] = 0xFFFD;
@@ -157,17 +185,25 @@ void init_ucs(void)
         c[0] = i;
         p = c;
         len = 1;
-       if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0))
+       if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0))
             unitab_scoacs[i] = wc[0];
         else
             unitab_scoacs[i] = 0xFFFD;
      }
  
-    /* Find the line control characters. */
-    for (i = 0; i < 256; i++)
-       if (unitab_line[i] < ' '
-           || (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0))
+    /*
+     * Find the control characters in the line codepage. For
+     * direct-to-font mode using the D800 hack, we assume 00-1F and
+     * 7F are controls, but allow 80-9F through. (It's as good a
+     * guess as anything; and my bet is that half the weird fonts
+     * used in this way will be IBM or MS code pages anyway.)
+     */
+    for (i = 0; i < 256; i++) {
+       int lineval = unitab_line[i];
+       if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) ||
+           (lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F))
             unitab_ctrl[i] = i;
         else
             unitab_ctrl[i] = 0xFF;
+    }
  }
diff --git a/winstuff.h b/winstuff.h

index 06fb944..e2d700e 100644 (file)
--- a/winstuff.h
+++ b/winstuff.h
@@ -189,4 +189,9 @@ void force_normal(HWND hwnd);
  void UpdateSizeTip(HWND src, int cx, int cy);
  void EnableSizeTip(int bEnable);
  
+/*
+ * Exports from unicode.c.
+ */
+void init_ucs(void);
+
  #endif
author	simon <simon@cda61777-01e9-0310-a592-d414129be87e>
	Wed, 1 Jan 2003 22:25:25 +0000 (22:25 +0000)
committer	simon <simon@cda61777-01e9-0310-a592-d414129be87e>
	Wed, 1 Jan 2003 22:25:25 +0000 (22:25 +0000)
putty.h		patch \| blob \| blame \| history
unix/pterm.1		patch \| blob \| blame \| history
unix/pterm.c		patch \| blob \| blame \| history
unix/unix.h		patch \| blob \| blame \| history
unix/uxucs.c		patch \| blob \| blame \| history
winstuff.h		patch \| blob \| blame \| history