| 1 | /* $Id$ */ |
| 2 | /* |
| 3 | * Copyright (c) 2003 Ben Harris |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person |
| 7 | * obtaining a copy of this software and associated documentation |
| 8 | * files (the "Software"), to deal in the Software without |
| 9 | * restriction, including without limitation the rights to use, |
| 10 | * copy, modify, merge, publish, distribute, sublicense, and/or |
| 11 | * sell copies of the Software, and to permit persons to whom the |
| 12 | * Software is furnished to do so, subject to the following |
| 13 | * conditions: |
| 14 | * |
| 15 | * The above copyright notice and this permission notice shall be |
| 16 | * included in all copies or substantial portions of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR |
| 22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF |
| 23 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 25 | * SOFTWARE. |
| 26 | */ |
| 27 | /* |
| 28 | * macenc.c -- Convert a Mac OS script/region/font combination to our |
| 29 | * internal charset code. |
| 30 | */ |
| 31 | |
| 32 | #include <string.h> |
| 33 | |
| 34 | #include "charset.h" |
| 35 | #include "internal.h" |
| 36 | |
| 37 | /* |
| 38 | * These are defined by Mac OS's <Script.h>, but we'd like to be |
| 39 | * independent of that. |
| 40 | */ |
| 41 | |
| 42 | #define smRoman 0 |
| 43 | #define smJapanese 1 |
| 44 | #define smTradChinese 2 |
| 45 | #define smKorean 3 |
| 46 | #define smArabic 4 |
| 47 | #define smHebrew 5 |
| 48 | #define smCyrillic 7 |
| 49 | #define smDevenagari 9 |
| 50 | #define smGurmukhi 10 |
| 51 | #define smGujurati 11 |
| 52 | #define smThai 21 |
| 53 | #define smSimpChinese 25 |
| 54 | #define smTibetan 26 |
| 55 | #define smEthiopic 28 |
| 56 | #define smCentralEuroRoman 29 |
| 57 | |
| 58 | #define verGreece 20 |
| 59 | #define verIceland 21 |
| 60 | #define verTurkey 24 |
| 61 | #define verYugoCroatian 25 |
| 62 | #define verRomania 39 |
| 63 | #define verFaroeIsl 47 |
| 64 | #define verIran 48 |
| 65 | #define verRussia 49 |
| 66 | #define verSlovenian 66 |
| 67 | #define verCroatia 68 |
| 68 | #define verBulgaria 72 |
| 69 | #define verScottishGaelic 75 |
| 70 | #define verManxGaelic 76 |
| 71 | #define verBreton 77 |
| 72 | #define verNunavut 78 |
| 73 | #define verWelsh 79 |
| 74 | #define verIrishGaelicScript 81 |
| 75 | |
| 76 | static const struct { |
| 77 | int script; |
| 78 | int region; |
| 79 | int sysvermin; |
| 80 | char const *fontname; |
| 81 | int charset; |
| 82 | } macencs[] = { |
| 83 | { smRoman, -1, 0x850, "VT100", CS_MAC_VT100 }, |
| 84 | { smRoman, -1, 0, "VT100", CS_MAC_VT100_OLD }, |
| 85 | /* |
| 86 | * From here on, this table is largely derived from |
| 87 | * <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/README.TXT>, |
| 88 | * with _OLD version added based on the comments in individual |
| 89 | * mapping files. |
| 90 | */ |
| 91 | { smRoman, -1, 0, "Symbol", CS_MAC_SYMBOL }, |
| 92 | { smRoman, -1, 0, "Zapf Dingbats", CS_MAC_DINGBATS }, |
| 93 | { smRoman, verTurkey, 0, NULL, CS_MAC_TURKISH }, |
| 94 | { smRoman, verYugoCroatian, 0x850, NULL, CS_MAC_CROATIAN }, |
| 95 | { smRoman, verYugoCroatian, 0, NULL, CS_MAC_CROATIAN_OLD }, |
| 96 | { smRoman, verSlovenian, 0x850, NULL, CS_MAC_CROATIAN }, |
| 97 | { smRoman, verSlovenian, 0, NULL, CS_MAC_CROATIAN_OLD }, |
| 98 | { smRoman, verCroatia, 0x850, NULL, CS_MAC_CROATIAN }, |
| 99 | { smRoman, verCroatia, 0, NULL, CS_MAC_CROATIAN_OLD }, |
| 100 | { smRoman, verIceland, 0x850, NULL, CS_MAC_ICELAND }, |
| 101 | { smRoman, verIceland, 0, NULL, CS_MAC_ICELAND_OLD }, |
| 102 | { smRoman, verFaroeIsl, 0x850, NULL, CS_MAC_ICELAND }, |
| 103 | { smRoman, verFaroeIsl, 0, NULL, CS_MAC_ICELAND_OLD }, |
| 104 | { smRoman, verRomania, 0x850, NULL, CS_MAC_ROMANIAN }, |
| 105 | { smRoman, verRomania, 0, NULL, CS_MAC_ROMANIAN_OLD }, |
| 106 | #if 0 /* No mapping table on ftp.unicode.org */ |
| 107 | { smRoman, verIreland, 0x850, NULL, CS_MAC_CELTIC }, |
| 108 | { smRoman, verIreland, 0, NULL, CS_MAC_CELTIC_OLD }, |
| 109 | { smRoman, verScottishGaelic, 0x850, NULL, CS_MAC_CELTIC }, |
| 110 | { smRoman, verScottishGaelic, 0, NULL, CS_MAC_CELTIC_OLD }, |
| 111 | { smRoman, verManxGaelic, 0x850, NULL, CS_MAC_CELTIC }, |
| 112 | { smRoman, verManxGaelic, 0, NULL, CS_MAC_CELTIC_OLD }, |
| 113 | { smRoman, verBreton, 0x850, NULL, CS_MAC_CELTIC }, |
| 114 | { smRoman, verBreton, 0, NULL, CS_MAC_CELTIC_OLD }, |
| 115 | { smRoman, verWelsh, 0x850, NULL, CS_MAC_CELTIC }, |
| 116 | { smRoman, verWelsh, 0, NULL, CS_MAC_CELTIC_OLD }, |
| 117 | { smRoman, verIrishGaelicScript, 0x850, NULL, CS_MAC_GAELIC }, |
| 118 | { smRoman, verIrishGaelicScript, 0, NULL, CS_MAC_GAELIC_OLD }, |
| 119 | #endif |
| 120 | { smRoman, verGreece, 0x922, NULL, CS_MAC_GREEK }, |
| 121 | { smRoman, verGreece, 0, NULL, CS_MAC_GREEK_OLD }, |
| 122 | { smRoman, -1, 0x850, NULL, CS_MAC_ROMAN }, |
| 123 | { smRoman, -1, 0, NULL, CS_MAC_ROMAN_OLD }, |
| 124 | #if 0 /* Multi-byte encodings, not yet supported */ |
| 125 | { smJapanese, -1, 0, NULL, CS_MAC_JAPANESE }, |
| 126 | { smTradChinese, -1, 0, NULL, CS_MAC_CHINTRAD }, |
| 127 | { smKorean, -1, 0, NULL, CS_MAC_KOREAN }, |
| 128 | #endif |
| 129 | #if 0 /* Bidirectional encodings, not yet supported */ |
| 130 | { smArabic, verIran, 0, NULL, CS_MAC_FARSI }, |
| 131 | { smArabic, -1, 0, NULL, CS_MAC_ARABIC }, |
| 132 | { smHebrew, -1, 0, NULL, CS_MAC_HEBREW }, |
| 133 | #endif |
| 134 | { smCyrillic, -1, 0x900, NULL, CS_MAC_CYRILLIC }, |
| 135 | { smCyrillic, verRussia, 0, NULL, CS_MAC_CYRILLIC_OLD }, |
| 136 | { smCyrillic, verBulgaria, 0, NULL, CS_MAC_CYRILLIC_OLD }, |
| 137 | { smCyrillic, -1, 0, NULL, CS_MAC_UKRAINE }, |
| 138 | #if 0 /* Complex Indic scripts, not yet supported */ |
| 139 | { smDevanagari, -1, 0, NULL, CS_MAC_DEVENAGA }, |
| 140 | { smGurmukhi, -1, 0, NULL, CS_MAC_GURMUKHI }, |
| 141 | { smGujurati, -1, 0, NULL, CS_MAC_GUJURATI }, |
| 142 | #endif |
| 143 | { smThai, -1, 0, NULL, CS_MAC_THAI }, |
| 144 | #if 0 /* Multi-byte encoding, not yet supported */ |
| 145 | { smSimpChinese, -1, 0, NULL, CS_MAC_CHINSIMP }, |
| 146 | #endif |
| 147 | #if 0 /* No mapping table on ftp.unicode.org */ |
| 148 | { smTibetan, -1, 0, NULL, CS_MAC_TIBETAN }, |
| 149 | { smEthiopic, -1, 0, NULL, CS_MAC_ETHIOPIC }, |
| 150 | { smEthiopic, verNanavut, 0, NULL, CS_MAC_INUIT }, |
| 151 | #endif |
| 152 | { smCentralEuroRoman, -1, 0, NULL, CS_MAC_CENTEURO }, |
| 153 | }; |
| 154 | |
| 155 | int charset_from_macenc(int script, int region, int sysvers, |
| 156 | char const *fontname) |
| 157 | { |
| 158 | int i; |
| 159 | |
| 160 | for (i = 0; i < (int)lenof(macencs); i++) |
| 161 | if ((macencs[i].script == script) && |
| 162 | (macencs[i].region < 0 || macencs[i].region == region) && |
| 163 | (macencs[i].sysvermin <= sysvers) && |
| 164 | (macencs[i].fontname == NULL || |
| 165 | (fontname != NULL && strcmp(macencs[i].fontname, fontname) == 0))) |
| 166 | return macencs[i].charset; |
| 167 | |
| 168 | return CS_NONE; |
| 169 | } |