| 1 | /************************************************************************ |
| 2 | * $Id$ |
| 3 | * |
| 4 | * ------------ |
| 5 | * Description: |
| 6 | * ------------ |
| 7 | * This is an implemention of Unicode's Bidirectional Algorithm |
| 8 | * (known as UAX #9). |
| 9 | * |
| 10 | * http://www.unicode.org/reports/tr9/ |
| 11 | * |
| 12 | * Author: Ahmad Khalifa |
| 13 | * |
| 14 | * ----------------- |
| 15 | * Revision Details: (Updated by Revision Control System) |
| 16 | * ----------------- |
| 17 | * $Date$ |
| 18 | * $Author$ |
| 19 | * $Revision$ |
| 20 | * |
| 21 | * (www.arabeyes.org - under MIT license) |
| 22 | * |
| 23 | ************************************************************************/ |
| 24 | |
| 25 | /* |
| 26 | * TODO: |
| 27 | * ===== |
| 28 | * - Explicit marks need to be handled (they are not 100% now) |
| 29 | * - Ligatures |
| 30 | */ |
| 31 | |
| 32 | #include <stdlib.h> /* definition of wchar_t*/ |
| 33 | |
| 34 | #include "misc.h" |
| 35 | |
| 36 | #define LMASK 0x3F /* Embedding Level mask */ |
| 37 | #define OMASK 0xC0 /* Override mask */ |
| 38 | #define OISL 0x80 /* Override is L */ |
| 39 | #define OISR 0x40 /* Override is R */ |
| 40 | |
| 41 | /* For standalone compilation in a testing mode. |
| 42 | * Still depends on the PuTTY headers for snewn and sfree, but can avoid |
| 43 | * _linking_ with any other PuTTY code. */ |
| 44 | #ifdef TEST_GETTYPE |
| 45 | #define safemalloc malloc |
| 46 | #define safefree free |
| 47 | #endif |
| 48 | |
| 49 | /* Shaping Helpers */ |
| 50 | #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \ |
| 51 | shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/ |
| 52 | #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b) |
| 53 | #define SFINAL(xh) ((xh)+1) |
| 54 | #define SINITIAL(xh) ((xh)+2) |
| 55 | #define SMEDIAL(ch) ((ch)+3) |
| 56 | |
| 57 | #define leastGreaterOdd(x) ( ((x)+1) | 1 ) |
| 58 | #define leastGreaterEven(x) ( ((x)+2) &~ 1 ) |
| 59 | |
| 60 | typedef struct bidi_char { |
| 61 | wchar_t origwc, wc; |
| 62 | unsigned short index; |
| 63 | } bidi_char; |
| 64 | |
| 65 | /* function declarations */ |
| 66 | void flipThisRun(bidi_char *from, unsigned char* level, int max, int count); |
| 67 | int findIndexOfRun(unsigned char* level , int start, int count, int tlevel); |
| 68 | unsigned char getType(int ch); |
| 69 | unsigned char setOverrideBits(unsigned char level, unsigned char override); |
| 70 | int getPreviousLevel(unsigned char* level, int from); |
| 71 | int do_shape(bidi_char *line, bidi_char *to, int count); |
| 72 | int do_bidi(bidi_char *line, int count); |
| 73 | void doMirror(wchar_t* ch); |
| 74 | |
| 75 | /* character types */ |
| 76 | enum { |
| 77 | L, |
| 78 | LRE, |
| 79 | LRO, |
| 80 | R, |
| 81 | AL, |
| 82 | RLE, |
| 83 | RLO, |
| 84 | PDF, |
| 85 | EN, |
| 86 | ES, |
| 87 | ET, |
| 88 | AN, |
| 89 | CS, |
| 90 | NSM, |
| 91 | BN, |
| 92 | B, |
| 93 | S, |
| 94 | WS, |
| 95 | ON |
| 96 | }; |
| 97 | |
| 98 | /* Shaping Types */ |
| 99 | enum { |
| 100 | SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */ |
| 101 | SR, /* Right-Joining, ie has Isolated, Final */ |
| 102 | SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */ |
| 103 | SU, /* Non-Joining */ |
| 104 | SC /* Join-Causing, like U+0640 (TATWEEL) */ |
| 105 | }; |
| 106 | |
| 107 | typedef struct { |
| 108 | char type; |
| 109 | wchar_t form_b; |
| 110 | } shape_node; |
| 111 | |
| 112 | /* Kept near the actual table, for verification. */ |
| 113 | #define SHAPE_FIRST 0x621 |
| 114 | #define SHAPE_LAST 0x64A |
| 115 | |
| 116 | const shape_node shapetypes[] = { |
| 117 | /* index, Typ, Iso, Ligature Index*/ |
| 118 | /* 621 */ {SU, 0xFE80}, |
| 119 | /* 622 */ {SR, 0xFE81}, |
| 120 | /* 623 */ {SR, 0xFE83}, |
| 121 | /* 624 */ {SR, 0xFE85}, |
| 122 | /* 625 */ {SR, 0xFE87}, |
| 123 | /* 626 */ {SD, 0xFE89}, |
| 124 | /* 627 */ {SR, 0xFE8D}, |
| 125 | /* 628 */ {SD, 0xFE8F}, |
| 126 | /* 629 */ {SR, 0xFE93}, |
| 127 | /* 62A */ {SD, 0xFE95}, |
| 128 | /* 62B */ {SD, 0xFE99}, |
| 129 | /* 62C */ {SD, 0xFE9D}, |
| 130 | /* 62D */ {SD, 0xFEA1}, |
| 131 | /* 62E */ {SD, 0xFEA5}, |
| 132 | /* 62F */ {SR, 0xFEA9}, |
| 133 | /* 630 */ {SR, 0xFEAB}, |
| 134 | /* 631 */ {SR, 0xFEAD}, |
| 135 | /* 632 */ {SR, 0xFEAF}, |
| 136 | /* 633 */ {SD, 0xFEB1}, |
| 137 | /* 634 */ {SD, 0xFEB5}, |
| 138 | /* 635 */ {SD, 0xFEB9}, |
| 139 | /* 636 */ {SD, 0xFEBD}, |
| 140 | /* 637 */ {SD, 0xFEC1}, |
| 141 | /* 638 */ {SD, 0xFEC5}, |
| 142 | /* 639 */ {SD, 0xFEC9}, |
| 143 | /* 63A */ {SD, 0xFECD}, |
| 144 | /* 63B */ {SU, 0x0}, |
| 145 | /* 63C */ {SU, 0x0}, |
| 146 | /* 63D */ {SU, 0x0}, |
| 147 | /* 63E */ {SU, 0x0}, |
| 148 | /* 63F */ {SU, 0x0}, |
| 149 | /* 640 */ {SC, 0x0}, |
| 150 | /* 641 */ {SD, 0xFED1}, |
| 151 | /* 642 */ {SD, 0xFED5}, |
| 152 | /* 643 */ {SD, 0xFED9}, |
| 153 | /* 644 */ {SD, 0xFEDD}, |
| 154 | /* 645 */ {SD, 0xFEE1}, |
| 155 | /* 646 */ {SD, 0xFEE5}, |
| 156 | /* 647 */ {SD, 0xFEE9}, |
| 157 | /* 648 */ {SR, 0xFEED}, |
| 158 | /* 649 */ {SR, 0xFEEF}, /* SD */ |
| 159 | /* 64A */ {SD, 0xFEF1} |
| 160 | }; |
| 161 | |
| 162 | /* |
| 163 | * Flips the text buffer, according to max level, and |
| 164 | * all higher levels |
| 165 | * |
| 166 | * Input: |
| 167 | * from: text buffer, on which to apply flipping |
| 168 | * level: resolved levels buffer |
| 169 | * max: the maximum level found in this line (should be unsigned char) |
| 170 | * count: line size in bidi_char |
| 171 | */ |
| 172 | void flipThisRun(bidi_char *from, unsigned char *level, int max, int count) |
| 173 | { |
| 174 | int i, j, k, tlevel; |
| 175 | bidi_char temp; |
| 176 | |
| 177 | j = i = 0; |
| 178 | while (i<count && j<count) { |
| 179 | |
| 180 | /* find the start of the run of level=max */ |
| 181 | tlevel = max; |
| 182 | i = j = findIndexOfRun(level, i, count, max); |
| 183 | /* find the end of the run */ |
| 184 | while (i<count && tlevel <= level[i]) { |
| 185 | i++; |
| 186 | } |
| 187 | for (k = i - 1; k > j; k--, j++) { |
| 188 | temp = from[k]; |
| 189 | from[k] = from[j]; |
| 190 | from[j] = temp; |
| 191 | } |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | /* |
| 196 | * Finds the index of a run with level equals tlevel |
| 197 | */ |
| 198 | int findIndexOfRun(unsigned char* level , int start, int count, int tlevel) |
| 199 | { |
| 200 | int i; |
| 201 | for (i=start; i<count; i++) { |
| 202 | if (tlevel == level[i]) { |
| 203 | return i; |
| 204 | } |
| 205 | } |
| 206 | return count; |
| 207 | } |
| 208 | |
| 209 | /* |
| 210 | * Returns the bidi character type of ch. |
| 211 | * |
| 212 | * The data table in this function is constructed from the Unicode |
| 213 | * Character Database, downloadable from unicode.org at the URL |
| 214 | * |
| 215 | * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt |
| 216 | * |
| 217 | * by the following fragment of Perl: |
| 218 | |
| 219 | perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \ |
| 220 | -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \ |
| 221 | -e 'if ($type eq $runtype and ($runend == $num-1 or ' \ |
| 222 | -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \ |
| 223 | -e '$pfl=$fl; END { &reset }; sub reset {' \ |
| 224 | -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \ |
| 225 | -e ' if defined $runstart and $runtype ne "ON";' \ |
| 226 | -e '$runstart=$runend=$num; $runtype=$type;}' \ |
| 227 | UnicodeData.txt |
| 228 | |
| 229 | */ |
| 230 | unsigned char getType(int ch) |
| 231 | { |
| 232 | static const struct { |
| 233 | int first, last, type; |
| 234 | } lookup[] = { |
| 235 | {0x0000, 0x0008, BN}, |
| 236 | {0x0009, 0x0009, S}, |
| 237 | {0x000a, 0x000a, B}, |
| 238 | {0x000b, 0x000b, S}, |
| 239 | {0x000c, 0x000c, WS}, |
| 240 | {0x000d, 0x000d, B}, |
| 241 | {0x000e, 0x001b, BN}, |
| 242 | {0x001c, 0x001e, B}, |
| 243 | {0x001f, 0x001f, S}, |
| 244 | {0x0020, 0x0020, WS}, |
| 245 | {0x0023, 0x0025, ET}, |
| 246 | {0x002b, 0x002b, ES}, |
| 247 | {0x002c, 0x002c, CS}, |
| 248 | {0x002d, 0x002d, ES}, |
| 249 | {0x002e, 0x002f, CS}, |
| 250 | {0x0030, 0x0039, EN}, |
| 251 | {0x003a, 0x003a, CS}, |
| 252 | {0x0041, 0x005a, L}, |
| 253 | {0x0061, 0x007a, L}, |
| 254 | {0x007f, 0x0084, BN}, |
| 255 | {0x0085, 0x0085, B}, |
| 256 | {0x0086, 0x009f, BN}, |
| 257 | {0x00a0, 0x00a0, CS}, |
| 258 | {0x00a2, 0x00a5, ET}, |
| 259 | {0x00aa, 0x00aa, L}, |
| 260 | {0x00ad, 0x00ad, BN}, |
| 261 | {0x00b0, 0x00b1, ET}, |
| 262 | {0x00b2, 0x00b3, EN}, |
| 263 | {0x00b5, 0x00b5, L}, |
| 264 | {0x00b9, 0x00b9, EN}, |
| 265 | {0x00ba, 0x00ba, L}, |
| 266 | {0x00c0, 0x00d6, L}, |
| 267 | {0x00d8, 0x00f6, L}, |
| 268 | {0x00f8, 0x0236, L}, |
| 269 | {0x0250, 0x02b8, L}, |
| 270 | {0x02bb, 0x02c1, L}, |
| 271 | {0x02d0, 0x02d1, L}, |
| 272 | {0x02e0, 0x02e4, L}, |
| 273 | {0x02ee, 0x02ee, L}, |
| 274 | {0x0300, 0x0357, NSM}, |
| 275 | {0x035d, 0x036f, NSM}, |
| 276 | {0x037a, 0x037a, L}, |
| 277 | {0x0386, 0x0386, L}, |
| 278 | {0x0388, 0x038a, L}, |
| 279 | {0x038c, 0x038c, L}, |
| 280 | {0x038e, 0x03a1, L}, |
| 281 | {0x03a3, 0x03ce, L}, |
| 282 | {0x03d0, 0x03f5, L}, |
| 283 | {0x03f7, 0x03fb, L}, |
| 284 | {0x0400, 0x0482, L}, |
| 285 | {0x0483, 0x0486, NSM}, |
| 286 | {0x0488, 0x0489, NSM}, |
| 287 | {0x048a, 0x04ce, L}, |
| 288 | {0x04d0, 0x04f5, L}, |
| 289 | {0x04f8, 0x04f9, L}, |
| 290 | {0x0500, 0x050f, L}, |
| 291 | {0x0531, 0x0556, L}, |
| 292 | {0x0559, 0x055f, L}, |
| 293 | {0x0561, 0x0587, L}, |
| 294 | {0x0589, 0x0589, L}, |
| 295 | {0x0591, 0x05a1, NSM}, |
| 296 | {0x05a3, 0x05b9, NSM}, |
| 297 | {0x05bb, 0x05bd, NSM}, |
| 298 | {0x05be, 0x05be, R}, |
| 299 | {0x05bf, 0x05bf, NSM}, |
| 300 | {0x05c0, 0x05c0, R}, |
| 301 | {0x05c1, 0x05c2, NSM}, |
| 302 | {0x05c3, 0x05c3, R}, |
| 303 | {0x05c4, 0x05c4, NSM}, |
| 304 | {0x05d0, 0x05ea, R}, |
| 305 | {0x05f0, 0x05f4, R}, |
| 306 | {0x0600, 0x0603, AL}, |
| 307 | {0x060c, 0x060c, CS}, |
| 308 | {0x060d, 0x060d, AL}, |
| 309 | {0x0610, 0x0615, NSM}, |
| 310 | {0x061b, 0x061b, AL}, |
| 311 | {0x061f, 0x061f, AL}, |
| 312 | {0x0621, 0x063a, AL}, |
| 313 | {0x0640, 0x064a, AL}, |
| 314 | {0x064b, 0x0658, NSM}, |
| 315 | {0x0660, 0x0669, AN}, |
| 316 | {0x066a, 0x066a, ET}, |
| 317 | {0x066b, 0x066c, AN}, |
| 318 | {0x066d, 0x066f, AL}, |
| 319 | {0x0670, 0x0670, NSM}, |
| 320 | {0x0671, 0x06d5, AL}, |
| 321 | {0x06d6, 0x06dc, NSM}, |
| 322 | {0x06dd, 0x06dd, AL}, |
| 323 | {0x06de, 0x06e4, NSM}, |
| 324 | {0x06e5, 0x06e6, AL}, |
| 325 | {0x06e7, 0x06e8, NSM}, |
| 326 | {0x06ea, 0x06ed, NSM}, |
| 327 | {0x06ee, 0x06ef, AL}, |
| 328 | {0x06f0, 0x06f9, EN}, |
| 329 | {0x06fa, 0x070d, AL}, |
| 330 | {0x070f, 0x070f, BN}, |
| 331 | {0x0710, 0x0710, AL}, |
| 332 | {0x0711, 0x0711, NSM}, |
| 333 | {0x0712, 0x072f, AL}, |
| 334 | {0x0730, 0x074a, NSM}, |
| 335 | {0x074d, 0x074f, AL}, |
| 336 | {0x0780, 0x07a5, AL}, |
| 337 | {0x07a6, 0x07b0, NSM}, |
| 338 | {0x07b1, 0x07b1, AL}, |
| 339 | {0x0901, 0x0902, NSM}, |
| 340 | {0x0903, 0x0939, L}, |
| 341 | {0x093c, 0x093c, NSM}, |
| 342 | {0x093d, 0x0940, L}, |
| 343 | {0x0941, 0x0948, NSM}, |
| 344 | {0x0949, 0x094c, L}, |
| 345 | {0x094d, 0x094d, NSM}, |
| 346 | {0x0950, 0x0950, L}, |
| 347 | {0x0951, 0x0954, NSM}, |
| 348 | {0x0958, 0x0961, L}, |
| 349 | {0x0962, 0x0963, NSM}, |
| 350 | {0x0964, 0x0970, L}, |
| 351 | {0x0981, 0x0981, NSM}, |
| 352 | {0x0982, 0x0983, L}, |
| 353 | {0x0985, 0x098c, L}, |
| 354 | {0x098f, 0x0990, L}, |
| 355 | {0x0993, 0x09a8, L}, |
| 356 | {0x09aa, 0x09b0, L}, |
| 357 | {0x09b2, 0x09b2, L}, |
| 358 | {0x09b6, 0x09b9, L}, |
| 359 | {0x09bc, 0x09bc, NSM}, |
| 360 | {0x09bd, 0x09c0, L}, |
| 361 | {0x09c1, 0x09c4, NSM}, |
| 362 | {0x09c7, 0x09c8, L}, |
| 363 | {0x09cb, 0x09cc, L}, |
| 364 | {0x09cd, 0x09cd, NSM}, |
| 365 | {0x09d7, 0x09d7, L}, |
| 366 | {0x09dc, 0x09dd, L}, |
| 367 | {0x09df, 0x09e1, L}, |
| 368 | {0x09e2, 0x09e3, NSM}, |
| 369 | {0x09e6, 0x09f1, L}, |
| 370 | {0x09f2, 0x09f3, ET}, |
| 371 | {0x09f4, 0x09fa, L}, |
| 372 | {0x0a01, 0x0a02, NSM}, |
| 373 | {0x0a03, 0x0a03, L}, |
| 374 | {0x0a05, 0x0a0a, L}, |
| 375 | {0x0a0f, 0x0a10, L}, |
| 376 | {0x0a13, 0x0a28, L}, |
| 377 | {0x0a2a, 0x0a30, L}, |
| 378 | {0x0a32, 0x0a33, L}, |
| 379 | {0x0a35, 0x0a36, L}, |
| 380 | {0x0a38, 0x0a39, L}, |
| 381 | {0x0a3c, 0x0a3c, NSM}, |
| 382 | {0x0a3e, 0x0a40, L}, |
| 383 | {0x0a41, 0x0a42, NSM}, |
| 384 | {0x0a47, 0x0a48, NSM}, |
| 385 | {0x0a4b, 0x0a4d, NSM}, |
| 386 | {0x0a59, 0x0a5c, L}, |
| 387 | {0x0a5e, 0x0a5e, L}, |
| 388 | {0x0a66, 0x0a6f, L}, |
| 389 | {0x0a70, 0x0a71, NSM}, |
| 390 | {0x0a72, 0x0a74, L}, |
| 391 | {0x0a81, 0x0a82, NSM}, |
| 392 | {0x0a83, 0x0a83, L}, |
| 393 | {0x0a85, 0x0a8d, L}, |
| 394 | {0x0a8f, 0x0a91, L}, |
| 395 | {0x0a93, 0x0aa8, L}, |
| 396 | {0x0aaa, 0x0ab0, L}, |
| 397 | {0x0ab2, 0x0ab3, L}, |
| 398 | {0x0ab5, 0x0ab9, L}, |
| 399 | {0x0abc, 0x0abc, NSM}, |
| 400 | {0x0abd, 0x0ac0, L}, |
| 401 | {0x0ac1, 0x0ac5, NSM}, |
| 402 | {0x0ac7, 0x0ac8, NSM}, |
| 403 | {0x0ac9, 0x0ac9, L}, |
| 404 | {0x0acb, 0x0acc, L}, |
| 405 | {0x0acd, 0x0acd, NSM}, |
| 406 | {0x0ad0, 0x0ad0, L}, |
| 407 | {0x0ae0, 0x0ae1, L}, |
| 408 | {0x0ae2, 0x0ae3, NSM}, |
| 409 | {0x0ae6, 0x0aef, L}, |
| 410 | {0x0af1, 0x0af1, ET}, |
| 411 | {0x0b01, 0x0b01, NSM}, |
| 412 | {0x0b02, 0x0b03, L}, |
| 413 | {0x0b05, 0x0b0c, L}, |
| 414 | {0x0b0f, 0x0b10, L}, |
| 415 | {0x0b13, 0x0b28, L}, |
| 416 | {0x0b2a, 0x0b30, L}, |
| 417 | {0x0b32, 0x0b33, L}, |
| 418 | {0x0b35, 0x0b39, L}, |
| 419 | {0x0b3c, 0x0b3c, NSM}, |
| 420 | {0x0b3d, 0x0b3e, L}, |
| 421 | {0x0b3f, 0x0b3f, NSM}, |
| 422 | {0x0b40, 0x0b40, L}, |
| 423 | {0x0b41, 0x0b43, NSM}, |
| 424 | {0x0b47, 0x0b48, L}, |
| 425 | {0x0b4b, 0x0b4c, L}, |
| 426 | {0x0b4d, 0x0b4d, NSM}, |
| 427 | {0x0b56, 0x0b56, NSM}, |
| 428 | {0x0b57, 0x0b57, L}, |
| 429 | {0x0b5c, 0x0b5d, L}, |
| 430 | {0x0b5f, 0x0b61, L}, |
| 431 | {0x0b66, 0x0b71, L}, |
| 432 | {0x0b82, 0x0b82, NSM}, |
| 433 | {0x0b83, 0x0b83, L}, |
| 434 | {0x0b85, 0x0b8a, L}, |
| 435 | {0x0b8e, 0x0b90, L}, |
| 436 | {0x0b92, 0x0b95, L}, |
| 437 | {0x0b99, 0x0b9a, L}, |
| 438 | {0x0b9c, 0x0b9c, L}, |
| 439 | {0x0b9e, 0x0b9f, L}, |
| 440 | {0x0ba3, 0x0ba4, L}, |
| 441 | {0x0ba8, 0x0baa, L}, |
| 442 | {0x0bae, 0x0bb5, L}, |
| 443 | {0x0bb7, 0x0bb9, L}, |
| 444 | {0x0bbe, 0x0bbf, L}, |
| 445 | {0x0bc0, 0x0bc0, NSM}, |
| 446 | {0x0bc1, 0x0bc2, L}, |
| 447 | {0x0bc6, 0x0bc8, L}, |
| 448 | {0x0bca, 0x0bcc, L}, |
| 449 | {0x0bcd, 0x0bcd, NSM}, |
| 450 | {0x0bd7, 0x0bd7, L}, |
| 451 | {0x0be7, 0x0bf2, L}, |
| 452 | {0x0bf9, 0x0bf9, ET}, |
| 453 | {0x0c01, 0x0c03, L}, |
| 454 | {0x0c05, 0x0c0c, L}, |
| 455 | {0x0c0e, 0x0c10, L}, |
| 456 | {0x0c12, 0x0c28, L}, |
| 457 | {0x0c2a, 0x0c33, L}, |
| 458 | {0x0c35, 0x0c39, L}, |
| 459 | {0x0c3e, 0x0c40, NSM}, |
| 460 | {0x0c41, 0x0c44, L}, |
| 461 | {0x0c46, 0x0c48, NSM}, |
| 462 | {0x0c4a, 0x0c4d, NSM}, |
| 463 | {0x0c55, 0x0c56, NSM}, |
| 464 | {0x0c60, 0x0c61, L}, |
| 465 | {0x0c66, 0x0c6f, L}, |
| 466 | {0x0c82, 0x0c83, L}, |
| 467 | {0x0c85, 0x0c8c, L}, |
| 468 | {0x0c8e, 0x0c90, L}, |
| 469 | {0x0c92, 0x0ca8, L}, |
| 470 | {0x0caa, 0x0cb3, L}, |
| 471 | {0x0cb5, 0x0cb9, L}, |
| 472 | {0x0cbc, 0x0cbc, NSM}, |
| 473 | {0x0cbd, 0x0cc4, L}, |
| 474 | {0x0cc6, 0x0cc8, L}, |
| 475 | {0x0cca, 0x0ccb, L}, |
| 476 | {0x0ccc, 0x0ccd, NSM}, |
| 477 | {0x0cd5, 0x0cd6, L}, |
| 478 | {0x0cde, 0x0cde, L}, |
| 479 | {0x0ce0, 0x0ce1, L}, |
| 480 | {0x0ce6, 0x0cef, L}, |
| 481 | {0x0d02, 0x0d03, L}, |
| 482 | {0x0d05, 0x0d0c, L}, |
| 483 | {0x0d0e, 0x0d10, L}, |
| 484 | {0x0d12, 0x0d28, L}, |
| 485 | {0x0d2a, 0x0d39, L}, |
| 486 | {0x0d3e, 0x0d40, L}, |
| 487 | {0x0d41, 0x0d43, NSM}, |
| 488 | {0x0d46, 0x0d48, L}, |
| 489 | {0x0d4a, 0x0d4c, L}, |
| 490 | {0x0d4d, 0x0d4d, NSM}, |
| 491 | {0x0d57, 0x0d57, L}, |
| 492 | {0x0d60, 0x0d61, L}, |
| 493 | {0x0d66, 0x0d6f, L}, |
| 494 | {0x0d82, 0x0d83, L}, |
| 495 | {0x0d85, 0x0d96, L}, |
| 496 | {0x0d9a, 0x0db1, L}, |
| 497 | {0x0db3, 0x0dbb, L}, |
| 498 | {0x0dbd, 0x0dbd, L}, |
| 499 | {0x0dc0, 0x0dc6, L}, |
| 500 | {0x0dca, 0x0dca, NSM}, |
| 501 | {0x0dcf, 0x0dd1, L}, |
| 502 | {0x0dd2, 0x0dd4, NSM}, |
| 503 | {0x0dd6, 0x0dd6, NSM}, |
| 504 | {0x0dd8, 0x0ddf, L}, |
| 505 | {0x0df2, 0x0df4, L}, |
| 506 | {0x0e01, 0x0e30, L}, |
| 507 | {0x0e31, 0x0e31, NSM}, |
| 508 | {0x0e32, 0x0e33, L}, |
| 509 | {0x0e34, 0x0e3a, NSM}, |
| 510 | {0x0e3f, 0x0e3f, ET}, |
| 511 | {0x0e40, 0x0e46, L}, |
| 512 | {0x0e47, 0x0e4e, NSM}, |
| 513 | {0x0e4f, 0x0e5b, L}, |
| 514 | {0x0e81, 0x0e82, L}, |
| 515 | {0x0e84, 0x0e84, L}, |
| 516 | {0x0e87, 0x0e88, L}, |
| 517 | {0x0e8a, 0x0e8a, L}, |
| 518 | {0x0e8d, 0x0e8d, L}, |
| 519 | {0x0e94, 0x0e97, L}, |
| 520 | {0x0e99, 0x0e9f, L}, |
| 521 | {0x0ea1, 0x0ea3, L}, |
| 522 | {0x0ea5, 0x0ea5, L}, |
| 523 | {0x0ea7, 0x0ea7, L}, |
| 524 | {0x0eaa, 0x0eab, L}, |
| 525 | {0x0ead, 0x0eb0, L}, |
| 526 | {0x0eb1, 0x0eb1, NSM}, |
| 527 | {0x0eb2, 0x0eb3, L}, |
| 528 | {0x0eb4, 0x0eb9, NSM}, |
| 529 | {0x0ebb, 0x0ebc, NSM}, |
| 530 | {0x0ebd, 0x0ebd, L}, |
| 531 | {0x0ec0, 0x0ec4, L}, |
| 532 | {0x0ec6, 0x0ec6, L}, |
| 533 | {0x0ec8, 0x0ecd, NSM}, |
| 534 | {0x0ed0, 0x0ed9, L}, |
| 535 | {0x0edc, 0x0edd, L}, |
| 536 | {0x0f00, 0x0f17, L}, |
| 537 | {0x0f18, 0x0f19, NSM}, |
| 538 | {0x0f1a, 0x0f34, L}, |
| 539 | {0x0f35, 0x0f35, NSM}, |
| 540 | {0x0f36, 0x0f36, L}, |
| 541 | {0x0f37, 0x0f37, NSM}, |
| 542 | {0x0f38, 0x0f38, L}, |
| 543 | {0x0f39, 0x0f39, NSM}, |
| 544 | {0x0f3e, 0x0f47, L}, |
| 545 | {0x0f49, 0x0f6a, L}, |
| 546 | {0x0f71, 0x0f7e, NSM}, |
| 547 | {0x0f7f, 0x0f7f, L}, |
| 548 | {0x0f80, 0x0f84, NSM}, |
| 549 | {0x0f85, 0x0f85, L}, |
| 550 | {0x0f86, 0x0f87, NSM}, |
| 551 | {0x0f88, 0x0f8b, L}, |
| 552 | {0x0f90, 0x0f97, NSM}, |
| 553 | {0x0f99, 0x0fbc, NSM}, |
| 554 | {0x0fbe, 0x0fc5, L}, |
| 555 | {0x0fc6, 0x0fc6, NSM}, |
| 556 | {0x0fc7, 0x0fcc, L}, |
| 557 | {0x0fcf, 0x0fcf, L}, |
| 558 | {0x1000, 0x1021, L}, |
| 559 | {0x1023, 0x1027, L}, |
| 560 | {0x1029, 0x102a, L}, |
| 561 | {0x102c, 0x102c, L}, |
| 562 | {0x102d, 0x1030, NSM}, |
| 563 | {0x1031, 0x1031, L}, |
| 564 | {0x1032, 0x1032, NSM}, |
| 565 | {0x1036, 0x1037, NSM}, |
| 566 | {0x1038, 0x1038, L}, |
| 567 | {0x1039, 0x1039, NSM}, |
| 568 | {0x1040, 0x1057, L}, |
| 569 | {0x1058, 0x1059, NSM}, |
| 570 | {0x10a0, 0x10c5, L}, |
| 571 | {0x10d0, 0x10f8, L}, |
| 572 | {0x10fb, 0x10fb, L}, |
| 573 | {0x1100, 0x1159, L}, |
| 574 | {0x115f, 0x11a2, L}, |
| 575 | {0x11a8, 0x11f9, L}, |
| 576 | {0x1200, 0x1206, L}, |
| 577 | {0x1208, 0x1246, L}, |
| 578 | {0x1248, 0x1248, L}, |
| 579 | {0x124a, 0x124d, L}, |
| 580 | {0x1250, 0x1256, L}, |
| 581 | {0x1258, 0x1258, L}, |
| 582 | {0x125a, 0x125d, L}, |
| 583 | {0x1260, 0x1286, L}, |
| 584 | {0x1288, 0x1288, L}, |
| 585 | {0x128a, 0x128d, L}, |
| 586 | {0x1290, 0x12ae, L}, |
| 587 | {0x12b0, 0x12b0, L}, |
| 588 | {0x12b2, 0x12b5, L}, |
| 589 | {0x12b8, 0x12be, L}, |
| 590 | {0x12c0, 0x12c0, L}, |
| 591 | {0x12c2, 0x12c5, L}, |
| 592 | {0x12c8, 0x12ce, L}, |
| 593 | {0x12d0, 0x12d6, L}, |
| 594 | {0x12d8, 0x12ee, L}, |
| 595 | {0x12f0, 0x130e, L}, |
| 596 | {0x1310, 0x1310, L}, |
| 597 | {0x1312, 0x1315, L}, |
| 598 | {0x1318, 0x131e, L}, |
| 599 | {0x1320, 0x1346, L}, |
| 600 | {0x1348, 0x135a, L}, |
| 601 | {0x1361, 0x137c, L}, |
| 602 | {0x13a0, 0x13f4, L}, |
| 603 | {0x1401, 0x1676, L}, |
| 604 | {0x1680, 0x1680, WS}, |
| 605 | {0x1681, 0x169a, L}, |
| 606 | {0x16a0, 0x16f0, L}, |
| 607 | {0x1700, 0x170c, L}, |
| 608 | {0x170e, 0x1711, L}, |
| 609 | {0x1712, 0x1714, NSM}, |
| 610 | {0x1720, 0x1731, L}, |
| 611 | {0x1732, 0x1734, NSM}, |
| 612 | {0x1735, 0x1736, L}, |
| 613 | {0x1740, 0x1751, L}, |
| 614 | {0x1752, 0x1753, NSM}, |
| 615 | {0x1760, 0x176c, L}, |
| 616 | {0x176e, 0x1770, L}, |
| 617 | {0x1772, 0x1773, NSM}, |
| 618 | {0x1780, 0x17b6, L}, |
| 619 | {0x17b7, 0x17bd, NSM}, |
| 620 | {0x17be, 0x17c5, L}, |
| 621 | {0x17c6, 0x17c6, NSM}, |
| 622 | {0x17c7, 0x17c8, L}, |
| 623 | {0x17c9, 0x17d3, NSM}, |
| 624 | {0x17d4, 0x17da, L}, |
| 625 | {0x17db, 0x17db, ET}, |
| 626 | {0x17dc, 0x17dc, L}, |
| 627 | {0x17dd, 0x17dd, NSM}, |
| 628 | {0x17e0, 0x17e9, L}, |
| 629 | {0x180b, 0x180d, NSM}, |
| 630 | {0x180e, 0x180e, WS}, |
| 631 | {0x1810, 0x1819, L}, |
| 632 | {0x1820, 0x1877, L}, |
| 633 | {0x1880, 0x18a8, L}, |
| 634 | {0x18a9, 0x18a9, NSM}, |
| 635 | {0x1900, 0x191c, L}, |
| 636 | {0x1920, 0x1922, NSM}, |
| 637 | {0x1923, 0x1926, L}, |
| 638 | {0x1927, 0x192b, NSM}, |
| 639 | {0x1930, 0x1931, L}, |
| 640 | {0x1932, 0x1932, NSM}, |
| 641 | {0x1933, 0x1938, L}, |
| 642 | {0x1939, 0x193b, NSM}, |
| 643 | {0x1946, 0x196d, L}, |
| 644 | {0x1970, 0x1974, L}, |
| 645 | {0x1d00, 0x1d6b, L}, |
| 646 | {0x1e00, 0x1e9b, L}, |
| 647 | {0x1ea0, 0x1ef9, L}, |
| 648 | {0x1f00, 0x1f15, L}, |
| 649 | {0x1f18, 0x1f1d, L}, |
| 650 | {0x1f20, 0x1f45, L}, |
| 651 | {0x1f48, 0x1f4d, L}, |
| 652 | {0x1f50, 0x1f57, L}, |
| 653 | {0x1f59, 0x1f59, L}, |
| 654 | {0x1f5b, 0x1f5b, L}, |
| 655 | {0x1f5d, 0x1f5d, L}, |
| 656 | {0x1f5f, 0x1f7d, L}, |
| 657 | {0x1f80, 0x1fb4, L}, |
| 658 | {0x1fb6, 0x1fbc, L}, |
| 659 | {0x1fbe, 0x1fbe, L}, |
| 660 | {0x1fc2, 0x1fc4, L}, |
| 661 | {0x1fc6, 0x1fcc, L}, |
| 662 | {0x1fd0, 0x1fd3, L}, |
| 663 | {0x1fd6, 0x1fdb, L}, |
| 664 | {0x1fe0, 0x1fec, L}, |
| 665 | {0x1ff2, 0x1ff4, L}, |
| 666 | {0x1ff6, 0x1ffc, L}, |
| 667 | {0x2000, 0x200a, WS}, |
| 668 | {0x200b, 0x200d, BN}, |
| 669 | {0x200e, 0x200e, L}, |
| 670 | {0x200f, 0x200f, R}, |
| 671 | {0x2028, 0x2028, WS}, |
| 672 | {0x2029, 0x2029, B}, |
| 673 | {0x202a, 0x202a, LRE}, |
| 674 | {0x202b, 0x202b, RLE}, |
| 675 | {0x202c, 0x202c, PDF}, |
| 676 | {0x202d, 0x202d, LRO}, |
| 677 | {0x202e, 0x202e, RLO}, |
| 678 | {0x202f, 0x202f, WS}, |
| 679 | {0x2030, 0x2034, ET}, |
| 680 | {0x2044, 0x2044, CS}, |
| 681 | {0x205f, 0x205f, WS}, |
| 682 | {0x2060, 0x2063, BN}, |
| 683 | {0x206a, 0x206f, BN}, |
| 684 | {0x2070, 0x2070, EN}, |
| 685 | {0x2071, 0x2071, L}, |
| 686 | {0x2074, 0x2079, EN}, |
| 687 | {0x207a, 0x207b, ET}, |
| 688 | {0x207f, 0x207f, L}, |
| 689 | {0x2080, 0x2089, EN}, |
| 690 | {0x208a, 0x208b, ET}, |
| 691 | {0x20a0, 0x20b1, ET}, |
| 692 | {0x20d0, 0x20ea, NSM}, |
| 693 | {0x2102, 0x2102, L}, |
| 694 | {0x2107, 0x2107, L}, |
| 695 | {0x210a, 0x2113, L}, |
| 696 | {0x2115, 0x2115, L}, |
| 697 | {0x2119, 0x211d, L}, |
| 698 | {0x2124, 0x2124, L}, |
| 699 | {0x2126, 0x2126, L}, |
| 700 | {0x2128, 0x2128, L}, |
| 701 | {0x212a, 0x212d, L}, |
| 702 | {0x212e, 0x212e, ET}, |
| 703 | {0x212f, 0x2131, L}, |
| 704 | {0x2133, 0x2139, L}, |
| 705 | {0x213d, 0x213f, L}, |
| 706 | {0x2145, 0x2149, L}, |
| 707 | {0x2160, 0x2183, L}, |
| 708 | {0x2212, 0x2213, ET}, |
| 709 | {0x2336, 0x237a, L}, |
| 710 | {0x2395, 0x2395, L}, |
| 711 | {0x2488, 0x249b, EN}, |
| 712 | {0x249c, 0x24e9, L}, |
| 713 | {0x2800, 0x28ff, L}, |
| 714 | {0x3000, 0x3000, WS}, |
| 715 | {0x3005, 0x3007, L}, |
| 716 | {0x3021, 0x3029, L}, |
| 717 | {0x302a, 0x302f, NSM}, |
| 718 | {0x3031, 0x3035, L}, |
| 719 | {0x3038, 0x303c, L}, |
| 720 | {0x3041, 0x3096, L}, |
| 721 | {0x3099, 0x309a, NSM}, |
| 722 | {0x309d, 0x309f, L}, |
| 723 | {0x30a1, 0x30fa, L}, |
| 724 | {0x30fc, 0x30ff, L}, |
| 725 | {0x3105, 0x312c, L}, |
| 726 | {0x3131, 0x318e, L}, |
| 727 | {0x3190, 0x31b7, L}, |
| 728 | {0x31f0, 0x321c, L}, |
| 729 | {0x3220, 0x3243, L}, |
| 730 | {0x3260, 0x327b, L}, |
| 731 | {0x327f, 0x32b0, L}, |
| 732 | {0x32c0, 0x32cb, L}, |
| 733 | {0x32d0, 0x32fe, L}, |
| 734 | {0x3300, 0x3376, L}, |
| 735 | {0x337b, 0x33dd, L}, |
| 736 | {0x33e0, 0x33fe, L}, |
| 737 | {0x3400, 0x4db5, L}, |
| 738 | {0x4e00, 0x9fa5, L}, |
| 739 | {0xa000, 0xa48c, L}, |
| 740 | {0xac00, 0xd7a3, L}, |
| 741 | {0xd800, 0xfa2d, L}, |
| 742 | {0xfa30, 0xfa6a, L}, |
| 743 | {0xfb00, 0xfb06, L}, |
| 744 | {0xfb13, 0xfb17, L}, |
| 745 | {0xfb1d, 0xfb1d, R}, |
| 746 | {0xfb1e, 0xfb1e, NSM}, |
| 747 | {0xfb1f, 0xfb28, R}, |
| 748 | {0xfb29, 0xfb29, ET}, |
| 749 | {0xfb2a, 0xfb36, R}, |
| 750 | {0xfb38, 0xfb3c, R}, |
| 751 | {0xfb3e, 0xfb3e, R}, |
| 752 | {0xfb40, 0xfb41, R}, |
| 753 | {0xfb43, 0xfb44, R}, |
| 754 | {0xfb46, 0xfb4f, R}, |
| 755 | {0xfb50, 0xfbb1, AL}, |
| 756 | {0xfbd3, 0xfd3d, AL}, |
| 757 | {0xfd50, 0xfd8f, AL}, |
| 758 | {0xfd92, 0xfdc7, AL}, |
| 759 | {0xfdf0, 0xfdfc, AL}, |
| 760 | {0xfe00, 0xfe0f, NSM}, |
| 761 | {0xfe20, 0xfe23, NSM}, |
| 762 | {0xfe50, 0xfe50, CS}, |
| 763 | {0xfe52, 0xfe52, CS}, |
| 764 | {0xfe55, 0xfe55, CS}, |
| 765 | {0xfe5f, 0xfe5f, ET}, |
| 766 | {0xfe62, 0xfe63, ET}, |
| 767 | {0xfe69, 0xfe6a, ET}, |
| 768 | {0xfe70, 0xfe74, AL}, |
| 769 | {0xfe76, 0xfefc, AL}, |
| 770 | {0xfeff, 0xfeff, BN}, |
| 771 | {0xff03, 0xff05, ET}, |
| 772 | {0xff0b, 0xff0b, ET}, |
| 773 | {0xff0c, 0xff0c, CS}, |
| 774 | {0xff0d, 0xff0d, ET}, |
| 775 | {0xff0e, 0xff0e, CS}, |
| 776 | {0xff0f, 0xff0f, ES}, |
| 777 | {0xff10, 0xff19, EN}, |
| 778 | {0xff1a, 0xff1a, CS}, |
| 779 | {0xff21, 0xff3a, L}, |
| 780 | {0xff41, 0xff5a, L}, |
| 781 | {0xff66, 0xffbe, L}, |
| 782 | {0xffc2, 0xffc7, L}, |
| 783 | {0xffca, 0xffcf, L}, |
| 784 | {0xffd2, 0xffd7, L}, |
| 785 | {0xffda, 0xffdc, L}, |
| 786 | {0xffe0, 0xffe1, ET}, |
| 787 | {0xffe5, 0xffe6, ET}, |
| 788 | {0x10000, 0x1000b, L}, |
| 789 | {0x1000d, 0x10026, L}, |
| 790 | {0x10028, 0x1003a, L}, |
| 791 | {0x1003c, 0x1003d, L}, |
| 792 | {0x1003f, 0x1004d, L}, |
| 793 | {0x10050, 0x1005d, L}, |
| 794 | {0x10080, 0x100fa, L}, |
| 795 | {0x10100, 0x10100, L}, |
| 796 | {0x10102, 0x10102, L}, |
| 797 | {0x10107, 0x10133, L}, |
| 798 | {0x10137, 0x1013f, L}, |
| 799 | {0x10300, 0x1031e, L}, |
| 800 | {0x10320, 0x10323, L}, |
| 801 | {0x10330, 0x1034a, L}, |
| 802 | {0x10380, 0x1039d, L}, |
| 803 | {0x1039f, 0x1039f, L}, |
| 804 | {0x10400, 0x1049d, L}, |
| 805 | {0x104a0, 0x104a9, L}, |
| 806 | {0x10800, 0x10805, R}, |
| 807 | {0x10808, 0x10808, R}, |
| 808 | {0x1080a, 0x10835, R}, |
| 809 | {0x10837, 0x10838, R}, |
| 810 | {0x1083c, 0x1083c, R}, |
| 811 | {0x1083f, 0x1083f, R}, |
| 812 | {0x1d000, 0x1d0f5, L}, |
| 813 | {0x1d100, 0x1d126, L}, |
| 814 | {0x1d12a, 0x1d166, L}, |
| 815 | {0x1d167, 0x1d169, NSM}, |
| 816 | {0x1d16a, 0x1d172, L}, |
| 817 | {0x1d173, 0x1d17a, BN}, |
| 818 | {0x1d17b, 0x1d182, NSM}, |
| 819 | {0x1d183, 0x1d184, L}, |
| 820 | {0x1d185, 0x1d18b, NSM}, |
| 821 | {0x1d18c, 0x1d1a9, L}, |
| 822 | {0x1d1aa, 0x1d1ad, NSM}, |
| 823 | {0x1d1ae, 0x1d1dd, L}, |
| 824 | {0x1d400, 0x1d454, L}, |
| 825 | {0x1d456, 0x1d49c, L}, |
| 826 | {0x1d49e, 0x1d49f, L}, |
| 827 | {0x1d4a2, 0x1d4a2, L}, |
| 828 | {0x1d4a5, 0x1d4a6, L}, |
| 829 | {0x1d4a9, 0x1d4ac, L}, |
| 830 | {0x1d4ae, 0x1d4b9, L}, |
| 831 | {0x1d4bb, 0x1d4bb, L}, |
| 832 | {0x1d4bd, 0x1d4c3, L}, |
| 833 | {0x1d4c5, 0x1d505, L}, |
| 834 | {0x1d507, 0x1d50a, L}, |
| 835 | {0x1d50d, 0x1d514, L}, |
| 836 | {0x1d516, 0x1d51c, L}, |
| 837 | {0x1d51e, 0x1d539, L}, |
| 838 | {0x1d53b, 0x1d53e, L}, |
| 839 | {0x1d540, 0x1d544, L}, |
| 840 | {0x1d546, 0x1d546, L}, |
| 841 | {0x1d54a, 0x1d550, L}, |
| 842 | {0x1d552, 0x1d6a3, L}, |
| 843 | {0x1d6a8, 0x1d7c9, L}, |
| 844 | {0x1d7ce, 0x1d7ff, EN}, |
| 845 | {0x20000, 0x2a6d6, L}, |
| 846 | {0x2f800, 0x2fa1d, L}, |
| 847 | {0xe0001, 0xe0001, BN}, |
| 848 | {0xe0020, 0xe007f, BN}, |
| 849 | {0xe0100, 0xe01ef, NSM}, |
| 850 | {0xf0000, 0xffffd, L}, |
| 851 | {0x100000, 0x10fffd, L} |
| 852 | }; |
| 853 | |
| 854 | int i, j, k; |
| 855 | |
| 856 | i = -1; |
| 857 | j = lenof(lookup); |
| 858 | |
| 859 | while (j - i > 1) { |
| 860 | k = (i + j) / 2; |
| 861 | if (ch < lookup[k].first) |
| 862 | j = k; |
| 863 | else if (ch > lookup[k].last) |
| 864 | i = k; |
| 865 | else |
| 866 | return lookup[k].type; |
| 867 | } |
| 868 | |
| 869 | /* |
| 870 | * If we reach here, the character was not in any of the |
| 871 | * intervals listed in the lookup table. This means we return |
| 872 | * ON (`Other Neutrals'). This is the appropriate code for any |
| 873 | * character genuinely not listed in the Unicode table, and |
| 874 | * also the table above has deliberately left out any |
| 875 | * characters _explicitly_ listed as ON (to save space!). |
| 876 | */ |
| 877 | return ON; |
| 878 | } |
| 879 | |
| 880 | /* |
| 881 | * Function exported to front ends to allow them to identify |
| 882 | * bidi-active characters (in case, for example, the platform's |
| 883 | * text display function can't conveniently be prevented from doing |
| 884 | * its own bidi and so special treatment is required for characters |
| 885 | * that would cause the bidi algorithm to activate). |
| 886 | * |
| 887 | * This function is passed a single Unicode code point, and returns |
| 888 | * nonzero if the presence of this code point can possibly cause |
| 889 | * the bidi algorithm to do any reordering. Thus, any string |
| 890 | * composed entirely of characters for which is_rtl() returns zero |
| 891 | * should be safe to pass to a bidi-active platform display |
| 892 | * function without fear. |
| 893 | * |
| 894 | * (is_rtl() must therefore also return true for any character |
| 895 | * which would be affected by Arabic shaping, but this isn't |
| 896 | * important because all such characters are right-to-left so it |
| 897 | * would have flagged them anyway.) |
| 898 | */ |
| 899 | int is_rtl(int c) |
| 900 | { |
| 901 | /* |
| 902 | * After careful reading of the Unicode bidi algorithm (URL as |
| 903 | * given at the top of this file) I believe that the only |
| 904 | * character classes which can possibly cause trouble are R, |
| 905 | * AL, RLE and RLO. I think that any string containing no |
| 906 | * character in any of those classes will be displayed |
| 907 | * uniformly left-to-right by the Unicode bidi algorithm. |
| 908 | */ |
| 909 | const int mask = (1<<R) | (1<<AL) | (1<<RLE) | (1<<RLO); |
| 910 | |
| 911 | return mask & (1 << (getType(c))); |
| 912 | } |
| 913 | |
| 914 | /* |
| 915 | * The most significant 2 bits of each level are used to store |
| 916 | * Override status of each character |
| 917 | * This function sets the override bits of level according |
| 918 | * to the value in override, and reurns the new byte. |
| 919 | */ |
| 920 | unsigned char setOverrideBits(unsigned char level, unsigned char override) |
| 921 | { |
| 922 | if (override == ON) |
| 923 | return level; |
| 924 | else if (override == R) |
| 925 | return level | OISR; |
| 926 | else if (override == L) |
| 927 | return level | OISL; |
| 928 | return level; |
| 929 | } |
| 930 | |
| 931 | /* |
| 932 | * Find the most recent run of the same value in `level', and |
| 933 | * return the value _before_ it. Used to process U+202C POP |
| 934 | * DIRECTIONAL FORMATTING. |
| 935 | */ |
| 936 | int getPreviousLevel(unsigned char* level, int from) |
| 937 | { |
| 938 | if (from > 0) { |
| 939 | unsigned char current = level[--from]; |
| 940 | |
| 941 | while (from >= 0 && level[from] == current) |
| 942 | from--; |
| 943 | |
| 944 | if (from >= 0) |
| 945 | return level[from]; |
| 946 | |
| 947 | return -1; |
| 948 | } else |
| 949 | return -1; |
| 950 | } |
| 951 | |
| 952 | /* The Main shaping function, and the only one to be used |
| 953 | * by the outside world. |
| 954 | * |
| 955 | * line: buffer to apply shaping to. this must be passed by doBidi() first |
| 956 | * to: output buffer for the shaped data |
| 957 | * count: number of characters in line |
| 958 | */ |
| 959 | int do_shape(bidi_char *line, bidi_char *to, int count) |
| 960 | { |
| 961 | int i, tempShape, ligFlag; |
| 962 | |
| 963 | for (ligFlag=i=0; i<count; i++) { |
| 964 | to[i] = line[i]; |
| 965 | tempShape = STYPE(line[i].wc); |
| 966 | switch (tempShape) { |
| 967 | case SC: |
| 968 | break; |
| 969 | |
| 970 | case SU: |
| 971 | break; |
| 972 | |
| 973 | case SR: |
| 974 | tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU); |
| 975 | if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) |
| 976 | to[i].wc = SFINAL((SISOLATED(line[i].wc))); |
| 977 | else |
| 978 | to[i].wc = SISOLATED(line[i].wc); |
| 979 | break; |
| 980 | |
| 981 | |
| 982 | case SD: |
| 983 | /* Make Ligatures */ |
| 984 | tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU); |
| 985 | if (line[i].wc == 0x644) { |
| 986 | if (i > 0) switch (line[i-1].wc) { |
| 987 | case 0x622: |
| 988 | ligFlag = 1; |
| 989 | if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) |
| 990 | to[i].wc = 0xFEF6; |
| 991 | else |
| 992 | to[i].wc = 0xFEF5; |
| 993 | break; |
| 994 | case 0x623: |
| 995 | ligFlag = 1; |
| 996 | if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) |
| 997 | to[i].wc = 0xFEF8; |
| 998 | else |
| 999 | to[i].wc = 0xFEF7; |
| 1000 | break; |
| 1001 | case 0x625: |
| 1002 | ligFlag = 1; |
| 1003 | if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) |
| 1004 | to[i].wc = 0xFEFA; |
| 1005 | else |
| 1006 | to[i].wc = 0xFEF9; |
| 1007 | break; |
| 1008 | case 0x627: |
| 1009 | ligFlag = 1; |
| 1010 | if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) |
| 1011 | to[i].wc = 0xFEFC; |
| 1012 | else |
| 1013 | to[i].wc = 0xFEFB; |
| 1014 | break; |
| 1015 | } |
| 1016 | if (ligFlag) { |
| 1017 | to[i-1].wc = 0x20; |
| 1018 | ligFlag = 0; |
| 1019 | break; |
| 1020 | } |
| 1021 | } |
| 1022 | |
| 1023 | if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) { |
| 1024 | tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU); |
| 1025 | if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC)) |
| 1026 | to[i].wc = SMEDIAL((SISOLATED(line[i].wc))); |
| 1027 | else |
| 1028 | to[i].wc = SFINAL((SISOLATED(line[i].wc))); |
| 1029 | break; |
| 1030 | } |
| 1031 | |
| 1032 | tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU); |
| 1033 | if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC)) |
| 1034 | to[i].wc = SINITIAL((SISOLATED(line[i].wc))); |
| 1035 | else |
| 1036 | to[i].wc = SISOLATED(line[i].wc); |
| 1037 | break; |
| 1038 | |
| 1039 | |
| 1040 | } |
| 1041 | } |
| 1042 | return 1; |
| 1043 | } |
| 1044 | |
| 1045 | /* |
| 1046 | * The Main Bidi Function, and the only function that should |
| 1047 | * be used by the outside world. |
| 1048 | * |
| 1049 | * line: a buffer of size count containing text to apply |
| 1050 | * the Bidirectional algorithm to. |
| 1051 | */ |
| 1052 | |
| 1053 | int do_bidi(bidi_char *line, int count) |
| 1054 | { |
| 1055 | unsigned char* types; |
| 1056 | unsigned char* levels; |
| 1057 | unsigned char paragraphLevel; |
| 1058 | unsigned char currentEmbedding; |
| 1059 | unsigned char currentOverride; |
| 1060 | unsigned char tempType; |
| 1061 | int i, j, imax, yes, bover; |
| 1062 | |
| 1063 | /* Check the presence of R or AL types as optimization */ |
| 1064 | yes = 0; |
| 1065 | for (i=0; i<count; i++) { |
| 1066 | int type = getType(line[i].wc); |
| 1067 | if (type == R || type == AL) { |
| 1068 | yes = 1; |
| 1069 | break; |
| 1070 | } |
| 1071 | } |
| 1072 | if (yes == 0) |
| 1073 | return L; |
| 1074 | |
| 1075 | /* Initialize types, levels */ |
| 1076 | types = snewn(count, unsigned char); |
| 1077 | levels = snewn(count, unsigned char); |
| 1078 | |
| 1079 | /* Rule (P1) NOT IMPLEMENTED |
| 1080 | * P1. Split the text into separate paragraphs. A paragraph separator is |
| 1081 | * kept with the previous paragraph. Within each paragraph, apply all the |
| 1082 | * other rules of this algorithm. |
| 1083 | */ |
| 1084 | |
| 1085 | /* Rule (P2), (P3) |
| 1086 | * P2. In each paragraph, find the first character of type L, AL, or R. |
| 1087 | * P3. If a character is found in P2 and it is of type AL or R, then set |
| 1088 | * the paragraph embedding level to one; otherwise, set it to zero. |
| 1089 | */ |
| 1090 | paragraphLevel = 0; |
| 1091 | for (i=0; i<count ; i++) { |
| 1092 | int type = getType(line[i].wc); |
| 1093 | if (type == R || type == AL) { |
| 1094 | paragraphLevel = 1; |
| 1095 | break; |
| 1096 | } else if (type == L) |
| 1097 | break; |
| 1098 | } |
| 1099 | |
| 1100 | /* Rule (X1) |
| 1101 | * X1. Begin by setting the current embedding level to the paragraph |
| 1102 | * embedding level. Set the directional override status to neutral. |
| 1103 | */ |
| 1104 | currentEmbedding = paragraphLevel; |
| 1105 | currentOverride = ON; |
| 1106 | |
| 1107 | /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8) |
| 1108 | * X2. With each RLE, compute the least greater odd embedding level. |
| 1109 | * X3. With each LRE, compute the least greater even embedding level. |
| 1110 | * X4. With each RLO, compute the least greater odd embedding level. |
| 1111 | * X5. With each LRO, compute the least greater even embedding level. |
| 1112 | * X6. For all types besides RLE, LRE, RLO, LRO, and PDF: |
| 1113 | * a. Set the level of the current character to the current |
| 1114 | * embedding level. |
| 1115 | * b. Whenever the directional override status is not neutral, |
| 1116 | * reset the current character type to the directional |
| 1117 | * override status. |
| 1118 | * X7. With each PDF, determine the matching embedding or override code. |
| 1119 | * If there was a valid matching code, restore (pop) the last |
| 1120 | * remembered (pushed) embedding level and directional override. |
| 1121 | * X8. All explicit directional embeddings and overrides are completely |
| 1122 | * terminated at the end of each paragraph. Paragraph separators are not |
| 1123 | * included in the embedding. (Useless here) NOT IMPLEMENTED |
| 1124 | */ |
| 1125 | bover = 0; |
| 1126 | for (i=0; i<count; i++) { |
| 1127 | tempType = getType(line[i].wc); |
| 1128 | switch (tempType) { |
| 1129 | case RLE: |
| 1130 | currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding); |
| 1131 | levels[i] = setOverrideBits(levels[i], currentOverride); |
| 1132 | currentOverride = ON; |
| 1133 | break; |
| 1134 | |
| 1135 | case LRE: |
| 1136 | currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding); |
| 1137 | levels[i] = setOverrideBits(levels[i], currentOverride); |
| 1138 | currentOverride = ON; |
| 1139 | break; |
| 1140 | |
| 1141 | case RLO: |
| 1142 | currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding); |
| 1143 | tempType = currentOverride = R; |
| 1144 | bover = 1; |
| 1145 | break; |
| 1146 | |
| 1147 | case LRO: |
| 1148 | currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding); |
| 1149 | tempType = currentOverride = L; |
| 1150 | bover = 1; |
| 1151 | break; |
| 1152 | |
| 1153 | case PDF: |
| 1154 | { |
| 1155 | int prevlevel = getPreviousLevel(levels, i); |
| 1156 | |
| 1157 | if (prevlevel == -1) { |
| 1158 | currentEmbedding = paragraphLevel; |
| 1159 | currentOverride = ON; |
| 1160 | } else { |
| 1161 | currentOverride = currentEmbedding & OMASK; |
| 1162 | currentEmbedding = currentEmbedding & ~OMASK; |
| 1163 | } |
| 1164 | } |
| 1165 | levels[i] = currentEmbedding; |
| 1166 | break; |
| 1167 | |
| 1168 | /* Whitespace is treated as neutral for now */ |
| 1169 | case WS: |
| 1170 | case S: |
| 1171 | levels[i] = currentEmbedding; |
| 1172 | tempType = ON; |
| 1173 | if (currentOverride != ON) |
| 1174 | tempType = currentOverride; |
| 1175 | break; |
| 1176 | |
| 1177 | default: |
| 1178 | levels[i] = currentEmbedding; |
| 1179 | if (currentOverride != ON) |
| 1180 | tempType = currentOverride; |
| 1181 | break; |
| 1182 | |
| 1183 | } |
| 1184 | types[i] = tempType; |
| 1185 | } |
| 1186 | /* this clears out all overrides, so we can use levels safely... */ |
| 1187 | /* checks bover first */ |
| 1188 | if (bover) |
| 1189 | for (i=0; i<count; i++) |
| 1190 | levels[i] = levels[i] & LMASK; |
| 1191 | |
| 1192 | /* Rule (X9) |
| 1193 | * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes. |
| 1194 | * Here, they're converted to BN. |
| 1195 | */ |
| 1196 | for (i=0; i<count; i++) { |
| 1197 | switch (types[i]) { |
| 1198 | case RLE: |
| 1199 | case LRE: |
| 1200 | case RLO: |
| 1201 | case LRO: |
| 1202 | case PDF: |
| 1203 | types[i] = BN; |
| 1204 | break; |
| 1205 | } |
| 1206 | } |
| 1207 | |
| 1208 | /* Rule (W1) |
| 1209 | * W1. Examine each non-spacing mark (NSM) in the level run, and change |
| 1210 | * the type of the NSM to the type of the previous character. If the NSM |
| 1211 | * is at the start of the level run, it will get the type of sor. |
| 1212 | */ |
| 1213 | if (types[0] == NSM) |
| 1214 | types[0] = paragraphLevel; |
| 1215 | |
| 1216 | for (i=1; i<count; i++) { |
| 1217 | if (types[i] == NSM) |
| 1218 | types[i] = types[i-1]; |
| 1219 | /* Is this a safe assumption? |
| 1220 | * I assumed the previous, IS a character. |
| 1221 | */ |
| 1222 | } |
| 1223 | |
| 1224 | /* Rule (W2) |
| 1225 | * W2. Search backwards from each instance of a European number until the |
| 1226 | * first strong type (R, L, AL, or sor) is found. If an AL is found, |
| 1227 | * change the type of the European number to Arabic number. |
| 1228 | */ |
| 1229 | for (i=0; i<count; i++) { |
| 1230 | if (types[i] == EN) { |
| 1231 | j=i; |
| 1232 | while (j >= 0) { |
| 1233 | if (types[j] == AL) { |
| 1234 | types[i] = AN; |
| 1235 | break; |
| 1236 | } else if (types[j] == R || types[j] == L) { |
| 1237 | break; |
| 1238 | } |
| 1239 | j--; |
| 1240 | } |
| 1241 | } |
| 1242 | } |
| 1243 | |
| 1244 | /* Rule (W3) |
| 1245 | * W3. Change all ALs to R. |
| 1246 | * |
| 1247 | * Optimization: on Rule Xn, we might set a flag on AL type |
| 1248 | * to prevent this loop in L R lines only... |
| 1249 | */ |
| 1250 | for (i=0; i<count; i++) { |
| 1251 | if (types[i] == AL) |
| 1252 | types[i] = R; |
| 1253 | } |
| 1254 | |
| 1255 | /* Rule (W4) |
| 1256 | * W4. A single European separator between two European numbers changes |
| 1257 | * to a European number. A single common separator between two numbers |
| 1258 | * of the same type changes to that type. |
| 1259 | */ |
| 1260 | for (i=1; i<(count-1); i++) { |
| 1261 | if (types[i] == ES) { |
| 1262 | if (types[i-1] == EN && types[i+1] == EN) |
| 1263 | types[i] = EN; |
| 1264 | } else if (types[i] == CS) { |
| 1265 | if (types[i-1] == EN && types[i+1] == EN) |
| 1266 | types[i] = EN; |
| 1267 | else if (types[i-1] == AN && types[i+1] == AN) |
| 1268 | types[i] = AN; |
| 1269 | } |
| 1270 | } |
| 1271 | |
| 1272 | /* Rule (W5) |
| 1273 | * W5. A sequence of European terminators adjacent to European numbers |
| 1274 | * changes to all European numbers. |
| 1275 | * |
| 1276 | * Optimization: lots here... else ifs need rearrangement |
| 1277 | */ |
| 1278 | for (i=0; i<count; i++) { |
| 1279 | if (types[i] == ET) { |
| 1280 | if (i > 0 && types[i-1] == EN) { |
| 1281 | types[i] = EN; |
| 1282 | continue; |
| 1283 | } else if (i < count-1 && types[i+1] == EN) { |
| 1284 | types[i] = EN; |
| 1285 | continue; |
| 1286 | } else if (i < count-1 && types[i+1] == ET) { |
| 1287 | j=i; |
| 1288 | while (j <count && types[j] == ET) { |
| 1289 | j++; |
| 1290 | } |
| 1291 | if (types[j] == EN) |
| 1292 | types[i] = EN; |
| 1293 | } |
| 1294 | } |
| 1295 | } |
| 1296 | |
| 1297 | /* Rule (W6) |
| 1298 | * W6. Otherwise, separators and terminators change to Other Neutral: |
| 1299 | */ |
| 1300 | for (i=0; i<count; i++) { |
| 1301 | switch (types[i]) { |
| 1302 | case ES: |
| 1303 | case ET: |
| 1304 | case CS: |
| 1305 | types[i] = ON; |
| 1306 | break; |
| 1307 | } |
| 1308 | } |
| 1309 | |
| 1310 | /* Rule (W7) |
| 1311 | * W7. Search backwards from each instance of a European number until |
| 1312 | * the first strong type (R, L, or sor) is found. If an L is found, |
| 1313 | * then change the type of the European number to L. |
| 1314 | */ |
| 1315 | for (i=0; i<count; i++) { |
| 1316 | if (types[i] == EN) { |
| 1317 | j=i; |
| 1318 | while (j >= 0) { |
| 1319 | if (types[j] == L) { |
| 1320 | types[i] = L; |
| 1321 | break; |
| 1322 | } else if (types[j] == R || types[j] == AL) { |
| 1323 | break; |
| 1324 | } |
| 1325 | j--; |
| 1326 | } |
| 1327 | } |
| 1328 | } |
| 1329 | |
| 1330 | /* Rule (N1) |
| 1331 | * N1. A sequence of neutrals takes the direction of the surrounding |
| 1332 | * strong text if the text on both sides has the same direction. European |
| 1333 | * and Arabic numbers are treated as though they were R. |
| 1334 | */ |
| 1335 | if (count >= 2 && types[0] == ON) { |
| 1336 | if ((types[1] == R) || (types[1] == EN) || (types[1] == AN)) |
| 1337 | types[0] = R; |
| 1338 | else if (types[1] == L) |
| 1339 | types[0] = L; |
| 1340 | } |
| 1341 | for (i=1; i<(count-1); i++) { |
| 1342 | if (types[i] == ON) { |
| 1343 | if (types[i-1] == L) { |
| 1344 | j=i; |
| 1345 | while (j<(count-1) && types[j] == ON) { |
| 1346 | j++; |
| 1347 | } |
| 1348 | if (types[j] == L) { |
| 1349 | while (i<j) { |
| 1350 | types[i] = L; |
| 1351 | i++; |
| 1352 | } |
| 1353 | } |
| 1354 | |
| 1355 | } else if ((types[i-1] == R) || |
| 1356 | (types[i-1] == EN) || |
| 1357 | (types[i-1] == AN)) { |
| 1358 | j=i; |
| 1359 | while (j<(count-1) && types[j] == ON) { |
| 1360 | j++; |
| 1361 | } |
| 1362 | if ((types[j] == R) || |
| 1363 | (types[j] == EN) || |
| 1364 | (types[j] == AN)) { |
| 1365 | while (i<j) { |
| 1366 | types[i] = R; |
| 1367 | i++; |
| 1368 | } |
| 1369 | } |
| 1370 | } |
| 1371 | } |
| 1372 | } |
| 1373 | if (count >= 2 && types[count-1] == ON) { |
| 1374 | if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN) |
| 1375 | types[count-1] = R; |
| 1376 | else if (types[count-2] == L) |
| 1377 | types[count-1] = L; |
| 1378 | } |
| 1379 | |
| 1380 | /* Rule (N2) |
| 1381 | * N2. Any remaining neutrals take the embedding direction. |
| 1382 | */ |
| 1383 | for (i=0; i<count; i++) { |
| 1384 | if (types[i] == ON) { |
| 1385 | if ((levels[i] % 2) == 0) |
| 1386 | types[i] = L; |
| 1387 | else |
| 1388 | types[i] = R; |
| 1389 | } |
| 1390 | } |
| 1391 | |
| 1392 | /* Rule (I1) |
| 1393 | * I1. For all characters with an even (left-to-right) embedding |
| 1394 | * direction, those of type R go up one level and those of type AN or |
| 1395 | * EN go up two levels. |
| 1396 | */ |
| 1397 | for (i=0; i<count; i++) { |
| 1398 | if ((levels[i] % 2) == 0) { |
| 1399 | if (types[i] == R) |
| 1400 | levels[i] += 1; |
| 1401 | else if (types[i] == AN || types[i] == EN) |
| 1402 | levels[i] += 2; |
| 1403 | } |
| 1404 | } |
| 1405 | |
| 1406 | /* Rule (I2) |
| 1407 | * I2. For all characters with an odd (right-to-left) embedding direction, |
| 1408 | * those of type L, EN or AN go up one level. |
| 1409 | */ |
| 1410 | for (i=0; i<count; i++) { |
| 1411 | if ((levels[i] % 2) == 1) { |
| 1412 | if (types[i] == L || types[i] == EN || types[i] == AN) |
| 1413 | levels[i] += 1; |
| 1414 | } |
| 1415 | } |
| 1416 | |
| 1417 | /* Rule (L1) |
| 1418 | * L1. On each line, reset the embedding level of the following characters |
| 1419 | * to the paragraph embedding level: |
| 1420 | * (1)segment separators, (2)paragraph separators, |
| 1421 | * (3)any sequence of whitespace characters preceding |
| 1422 | * a segment separator or paragraph separator, |
| 1423 | * (4)and any sequence of white space characters |
| 1424 | * at the end of the line. |
| 1425 | * The types of characters used here are the original types, not those |
| 1426 | * modified by the previous phase. |
| 1427 | */ |
| 1428 | j=count-1; |
| 1429 | while (j>0 && (getType(line[j].wc) == WS)) { |
| 1430 | j--; |
| 1431 | } |
| 1432 | if (j < (count-1)) { |
| 1433 | for (j++; j<count; j++) |
| 1434 | levels[j] = paragraphLevel; |
| 1435 | } |
| 1436 | for (i=0; i<count; i++) { |
| 1437 | tempType = getType(line[i].wc); |
| 1438 | if (tempType == WS) { |
| 1439 | j=i; |
| 1440 | while (j<count && (getType(line[j].wc) == WS)) { |
| 1441 | j++; |
| 1442 | } |
| 1443 | if (j==count || getType(line[j].wc) == B || |
| 1444 | getType(line[j].wc) == S) { |
| 1445 | for (j--; j>=i ; j--) { |
| 1446 | levels[j] = paragraphLevel; |
| 1447 | } |
| 1448 | } |
| 1449 | } else if (tempType == B || tempType == S) { |
| 1450 | levels[i] = paragraphLevel; |
| 1451 | } |
| 1452 | } |
| 1453 | |
| 1454 | /* Rule (L4) NOT IMPLEMENTED |
| 1455 | * L4. A character that possesses the mirrored property as specified by |
| 1456 | * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the |
| 1457 | * resolved directionality of that character is R. |
| 1458 | */ |
| 1459 | /* Note: this is implemented before L2 for efficiency */ |
| 1460 | for (i=0; i<count; i++) |
| 1461 | if ((levels[i] % 2) == 1) |
| 1462 | doMirror(&line[i].wc); |
| 1463 | |
| 1464 | /* Rule (L2) |
| 1465 | * L2. From the highest level found in the text to the lowest odd level on |
| 1466 | * each line, including intermediate levels not actually present in the |
| 1467 | * text, reverse any contiguous sequence of characters that are at that |
| 1468 | * level or higher |
| 1469 | */ |
| 1470 | /* we flip the character string and leave the level array */ |
| 1471 | imax = 0; |
| 1472 | i=0; |
| 1473 | tempType = levels[0]; |
| 1474 | while (i < count) { |
| 1475 | if (levels[i] > tempType) { |
| 1476 | tempType = levels[i]; |
| 1477 | imax=i; |
| 1478 | } |
| 1479 | i++; |
| 1480 | } |
| 1481 | /* maximum level in tempType, its index in imax. */ |
| 1482 | while (tempType > 0) { /* loop from highest level to the least odd, */ |
| 1483 | /* which i assume is 1 */ |
| 1484 | flipThisRun(line, levels, tempType, count); |
| 1485 | tempType--; |
| 1486 | } |
| 1487 | |
| 1488 | /* Rule (L3) NOT IMPLEMENTED |
| 1489 | * L3. Combining marks applied to a right-to-left base character will at |
| 1490 | * this point precede their base character. If the rendering engine |
| 1491 | * expects them to follow the base characters in the final display |
| 1492 | * process, then the ordering of the marks and the base character must |
| 1493 | * be reversed. |
| 1494 | */ |
| 1495 | sfree(types); |
| 1496 | sfree(levels); |
| 1497 | return R; |
| 1498 | } |
| 1499 | |
| 1500 | |
| 1501 | /* |
| 1502 | * Bad, Horrible function |
| 1503 | * takes a pointer to a character that is checked for |
| 1504 | * having a mirror glyph. |
| 1505 | */ |
| 1506 | void doMirror(wchar_t* ch) |
| 1507 | { |
| 1508 | if ((*ch & 0xFF00) == 0) { |
| 1509 | switch (*ch) { |
| 1510 | case 0x0028: *ch = 0x0029; break; |
| 1511 | case 0x0029: *ch = 0x0028; break; |
| 1512 | case 0x003C: *ch = 0x003E; break; |
| 1513 | case 0x003E: *ch = 0x003C; break; |
| 1514 | case 0x005B: *ch = 0x005D; break; |
| 1515 | case 0x005D: *ch = 0x005B; break; |
| 1516 | case 0x007B: *ch = 0x007D; break; |
| 1517 | case 0x007D: *ch = 0x007B; break; |
| 1518 | case 0x00AB: *ch = 0x00BB; break; |
| 1519 | case 0x00BB: *ch = 0x00AB; break; |
| 1520 | } |
| 1521 | } else if ((*ch & 0xFF00) == 0x2000) { |
| 1522 | switch (*ch) { |
| 1523 | case 0x2039: *ch = 0x203A; break; |
| 1524 | case 0x203A: *ch = 0x2039; break; |
| 1525 | case 0x2045: *ch = 0x2046; break; |
| 1526 | case 0x2046: *ch = 0x2045; break; |
| 1527 | case 0x207D: *ch = 0x207E; break; |
| 1528 | case 0x207E: *ch = 0x207D; break; |
| 1529 | case 0x208D: *ch = 0x208E; break; |
| 1530 | case 0x208E: *ch = 0x208D; break; |
| 1531 | } |
| 1532 | } else if ((*ch & 0xFF00) == 0x2200) { |
| 1533 | switch (*ch) { |
| 1534 | case 0x2208: *ch = 0x220B; break; |
| 1535 | case 0x2209: *ch = 0x220C; break; |
| 1536 | case 0x220A: *ch = 0x220D; break; |
| 1537 | case 0x220B: *ch = 0x2208; break; |
| 1538 | case 0x220C: *ch = 0x2209; break; |
| 1539 | case 0x220D: *ch = 0x220A; break; |
| 1540 | case 0x2215: *ch = 0x29F5; break; |
| 1541 | case 0x223C: *ch = 0x223D; break; |
| 1542 | case 0x223D: *ch = 0x223C; break; |
| 1543 | case 0x2243: *ch = 0x22CD; break; |
| 1544 | case 0x2252: *ch = 0x2253; break; |
| 1545 | case 0x2253: *ch = 0x2252; break; |
| 1546 | case 0x2254: *ch = 0x2255; break; |
| 1547 | case 0x2255: *ch = 0x2254; break; |
| 1548 | case 0x2264: *ch = 0x2265; break; |
| 1549 | case 0x2265: *ch = 0x2264; break; |
| 1550 | case 0x2266: *ch = 0x2267; break; |
| 1551 | case 0x2267: *ch = 0x2266; break; |
| 1552 | case 0x2268: *ch = 0x2269; break; |
| 1553 | case 0x2269: *ch = 0x2268; break; |
| 1554 | case 0x226A: *ch = 0x226B; break; |
| 1555 | case 0x226B: *ch = 0x226A; break; |
| 1556 | case 0x226E: *ch = 0x226F; break; |
| 1557 | case 0x226F: *ch = 0x226E; break; |
| 1558 | case 0x2270: *ch = 0x2271; break; |
| 1559 | case 0x2271: *ch = 0x2270; break; |
| 1560 | case 0x2272: *ch = 0x2273; break; |
| 1561 | case 0x2273: *ch = 0x2272; break; |
| 1562 | case 0x2274: *ch = 0x2275; break; |
| 1563 | case 0x2275: *ch = 0x2274; break; |
| 1564 | case 0x2276: *ch = 0x2277; break; |
| 1565 | case 0x2277: *ch = 0x2276; break; |
| 1566 | case 0x2278: *ch = 0x2279; break; |
| 1567 | case 0x2279: *ch = 0x2278; break; |
| 1568 | case 0x227A: *ch = 0x227B; break; |
| 1569 | case 0x227B: *ch = 0x227A; break; |
| 1570 | case 0x227C: *ch = 0x227D; break; |
| 1571 | case 0x227D: *ch = 0x227C; break; |
| 1572 | case 0x227E: *ch = 0x227F; break; |
| 1573 | case 0x227F: *ch = 0x227E; break; |
| 1574 | case 0x2280: *ch = 0x2281; break; |
| 1575 | case 0x2281: *ch = 0x2280; break; |
| 1576 | case 0x2282: *ch = 0x2283; break; |
| 1577 | case 0x2283: *ch = 0x2282; break; |
| 1578 | case 0x2284: *ch = 0x2285; break; |
| 1579 | case 0x2285: *ch = 0x2284; break; |
| 1580 | case 0x2286: *ch = 0x2287; break; |
| 1581 | case 0x2287: *ch = 0x2286; break; |
| 1582 | case 0x2288: *ch = 0x2289; break; |
| 1583 | case 0x2289: *ch = 0x2288; break; |
| 1584 | case 0x228A: *ch = 0x228B; break; |
| 1585 | case 0x228B: *ch = 0x228A; break; |
| 1586 | case 0x228F: *ch = 0x2290; break; |
| 1587 | case 0x2290: *ch = 0x228F; break; |
| 1588 | case 0x2291: *ch = 0x2292; break; |
| 1589 | case 0x2292: *ch = 0x2291; break; |
| 1590 | case 0x2298: *ch = 0x29B8; break; |
| 1591 | case 0x22A2: *ch = 0x22A3; break; |
| 1592 | case 0x22A3: *ch = 0x22A2; break; |
| 1593 | case 0x22A6: *ch = 0x2ADE; break; |
| 1594 | case 0x22A8: *ch = 0x2AE4; break; |
| 1595 | case 0x22A9: *ch = 0x2AE3; break; |
| 1596 | case 0x22AB: *ch = 0x2AE5; break; |
| 1597 | case 0x22B0: *ch = 0x22B1; break; |
| 1598 | case 0x22B1: *ch = 0x22B0; break; |
| 1599 | case 0x22B2: *ch = 0x22B3; break; |
| 1600 | case 0x22B3: *ch = 0x22B2; break; |
| 1601 | case 0x22B4: *ch = 0x22B5; break; |
| 1602 | case 0x22B5: *ch = 0x22B4; break; |
| 1603 | case 0x22B6: *ch = 0x22B7; break; |
| 1604 | case 0x22B7: *ch = 0x22B6; break; |
| 1605 | case 0x22C9: *ch = 0x22CA; break; |
| 1606 | case 0x22CA: *ch = 0x22C9; break; |
| 1607 | case 0x22CB: *ch = 0x22CC; break; |
| 1608 | case 0x22CC: *ch = 0x22CB; break; |
| 1609 | case 0x22CD: *ch = 0x2243; break; |
| 1610 | case 0x22D0: *ch = 0x22D1; break; |
| 1611 | case 0x22D1: *ch = 0x22D0; break; |
| 1612 | case 0x22D6: *ch = 0x22D7; break; |
| 1613 | case 0x22D7: *ch = 0x22D6; break; |
| 1614 | case 0x22D8: *ch = 0x22D9; break; |
| 1615 | case 0x22D9: *ch = 0x22D8; break; |
| 1616 | case 0x22DA: *ch = 0x22DB; break; |
| 1617 | case 0x22DB: *ch = 0x22DA; break; |
| 1618 | case 0x22DC: *ch = 0x22DD; break; |
| 1619 | case 0x22DD: *ch = 0x22DC; break; |
| 1620 | case 0x22DE: *ch = 0x22DF; break; |
| 1621 | case 0x22DF: *ch = 0x22DE; break; |
| 1622 | case 0x22E0: *ch = 0x22E1; break; |
| 1623 | case 0x22E1: *ch = 0x22E0; break; |
| 1624 | case 0x22E2: *ch = 0x22E3; break; |
| 1625 | case 0x22E3: *ch = 0x22E2; break; |
| 1626 | case 0x22E4: *ch = 0x22E5; break; |
| 1627 | case 0x22E5: *ch = 0x22E4; break; |
| 1628 | case 0x22E6: *ch = 0x22E7; break; |
| 1629 | case 0x22E7: *ch = 0x22E6; break; |
| 1630 | case 0x22E8: *ch = 0x22E9; break; |
| 1631 | case 0x22E9: *ch = 0x22E8; break; |
| 1632 | case 0x22EA: *ch = 0x22EB; break; |
| 1633 | case 0x22EB: *ch = 0x22EA; break; |
| 1634 | case 0x22EC: *ch = 0x22ED; break; |
| 1635 | case 0x22ED: *ch = 0x22EC; break; |
| 1636 | case 0x22F0: *ch = 0x22F1; break; |
| 1637 | case 0x22F1: *ch = 0x22F0; break; |
| 1638 | case 0x22F2: *ch = 0x22FA; break; |
| 1639 | case 0x22F3: *ch = 0x22FB; break; |
| 1640 | case 0x22F4: *ch = 0x22FC; break; |
| 1641 | case 0x22F6: *ch = 0x22FD; break; |
| 1642 | case 0x22F7: *ch = 0x22FE; break; |
| 1643 | case 0x22FA: *ch = 0x22F2; break; |
| 1644 | case 0x22FB: *ch = 0x22F3; break; |
| 1645 | case 0x22FC: *ch = 0x22F4; break; |
| 1646 | case 0x22FD: *ch = 0x22F6; break; |
| 1647 | case 0x22FE: *ch = 0x22F7; break; |
| 1648 | } |
| 1649 | } else if ((*ch & 0xFF00) == 0x2300) { |
| 1650 | switch (*ch) { |
| 1651 | case 0x2308: *ch = 0x2309; break; |
| 1652 | case 0x2309: *ch = 0x2308; break; |
| 1653 | case 0x230A: *ch = 0x230B; break; |
| 1654 | case 0x230B: *ch = 0x230A; break; |
| 1655 | case 0x2329: *ch = 0x232A; break; |
| 1656 | case 0x232A: *ch = 0x2329; break; |
| 1657 | } |
| 1658 | } else if ((*ch & 0xFF00) == 0x2700) { |
| 1659 | switch (*ch) { |
| 1660 | case 0x2768: *ch = 0x2769; break; |
| 1661 | case 0x2769: *ch = 0x2768; break; |
| 1662 | case 0x276A: *ch = 0x276B; break; |
| 1663 | case 0x276B: *ch = 0x276A; break; |
| 1664 | case 0x276C: *ch = 0x276D; break; |
| 1665 | case 0x276D: *ch = 0x276C; break; |
| 1666 | case 0x276E: *ch = 0x276F; break; |
| 1667 | case 0x276F: *ch = 0x276E; break; |
| 1668 | case 0x2770: *ch = 0x2771; break; |
| 1669 | case 0x2771: *ch = 0x2770; break; |
| 1670 | case 0x2772: *ch = 0x2773; break; |
| 1671 | case 0x2773: *ch = 0x2772; break; |
| 1672 | case 0x2774: *ch = 0x2775; break; |
| 1673 | case 0x2775: *ch = 0x2774; break; |
| 1674 | case 0x27D5: *ch = 0x27D6; break; |
| 1675 | case 0x27D6: *ch = 0x27D5; break; |
| 1676 | case 0x27DD: *ch = 0x27DE; break; |
| 1677 | case 0x27DE: *ch = 0x27DD; break; |
| 1678 | case 0x27E2: *ch = 0x27E3; break; |
| 1679 | case 0x27E3: *ch = 0x27E2; break; |
| 1680 | case 0x27E4: *ch = 0x27E5; break; |
| 1681 | case 0x27E5: *ch = 0x27E4; break; |
| 1682 | case 0x27E6: *ch = 0x27E7; break; |
| 1683 | case 0x27E7: *ch = 0x27E6; break; |
| 1684 | case 0x27E8: *ch = 0x27E9; break; |
| 1685 | case 0x27E9: *ch = 0x27E8; break; |
| 1686 | case 0x27EA: *ch = 0x27EB; break; |
| 1687 | case 0x27EB: *ch = 0x27EA; break; |
| 1688 | } |
| 1689 | } else if ((*ch & 0xFF00) == 0x2900) { |
| 1690 | switch (*ch) { |
| 1691 | case 0x2983: *ch = 0x2984; break; |
| 1692 | case 0x2984: *ch = 0x2983; break; |
| 1693 | case 0x2985: *ch = 0x2986; break; |
| 1694 | case 0x2986: *ch = 0x2985; break; |
| 1695 | case 0x2987: *ch = 0x2988; break; |
| 1696 | case 0x2988: *ch = 0x2987; break; |
| 1697 | case 0x2989: *ch = 0x298A; break; |
| 1698 | case 0x298A: *ch = 0x2989; break; |
| 1699 | case 0x298B: *ch = 0x298C; break; |
| 1700 | case 0x298C: *ch = 0x298B; break; |
| 1701 | case 0x298D: *ch = 0x2990; break; |
| 1702 | case 0x298E: *ch = 0x298F; break; |
| 1703 | case 0x298F: *ch = 0x298E; break; |
| 1704 | case 0x2990: *ch = 0x298D; break; |
| 1705 | case 0x2991: *ch = 0x2992; break; |
| 1706 | case 0x2992: *ch = 0x2991; break; |
| 1707 | case 0x2993: *ch = 0x2994; break; |
| 1708 | case 0x2994: *ch = 0x2993; break; |
| 1709 | case 0x2995: *ch = 0x2996; break; |
| 1710 | case 0x2996: *ch = 0x2995; break; |
| 1711 | case 0x2997: *ch = 0x2998; break; |
| 1712 | case 0x2998: *ch = 0x2997; break; |
| 1713 | case 0x29B8: *ch = 0x2298; break; |
| 1714 | case 0x29C0: *ch = 0x29C1; break; |
| 1715 | case 0x29C1: *ch = 0x29C0; break; |
| 1716 | case 0x29C4: *ch = 0x29C5; break; |
| 1717 | case 0x29C5: *ch = 0x29C4; break; |
| 1718 | case 0x29CF: *ch = 0x29D0; break; |
| 1719 | case 0x29D0: *ch = 0x29CF; break; |
| 1720 | case 0x29D1: *ch = 0x29D2; break; |
| 1721 | case 0x29D2: *ch = 0x29D1; break; |
| 1722 | case 0x29D4: *ch = 0x29D5; break; |
| 1723 | case 0x29D5: *ch = 0x29D4; break; |
| 1724 | case 0x29D8: *ch = 0x29D9; break; |
| 1725 | case 0x29D9: *ch = 0x29D8; break; |
| 1726 | case 0x29DA: *ch = 0x29DB; break; |
| 1727 | case 0x29DB: *ch = 0x29DA; break; |
| 1728 | case 0x29F5: *ch = 0x2215; break; |
| 1729 | case 0x29F8: *ch = 0x29F9; break; |
| 1730 | case 0x29F9: *ch = 0x29F8; break; |
| 1731 | case 0x29FC: *ch = 0x29FD; break; |
| 1732 | case 0x29FD: *ch = 0x29FC; break; |
| 1733 | } |
| 1734 | } else if ((*ch & 0xFF00) == 0x2A00) { |
| 1735 | switch (*ch) { |
| 1736 | case 0x2A2B: *ch = 0x2A2C; break; |
| 1737 | case 0x2A2C: *ch = 0x2A2B; break; |
| 1738 | case 0x2A2D: *ch = 0x2A2C; break; |
| 1739 | case 0x2A2E: *ch = 0x2A2D; break; |
| 1740 | case 0x2A34: *ch = 0x2A35; break; |
| 1741 | case 0x2A35: *ch = 0x2A34; break; |
| 1742 | case 0x2A3C: *ch = 0x2A3D; break; |
| 1743 | case 0x2A3D: *ch = 0x2A3C; break; |
| 1744 | case 0x2A64: *ch = 0x2A65; break; |
| 1745 | case 0x2A65: *ch = 0x2A64; break; |
| 1746 | case 0x2A79: *ch = 0x2A7A; break; |
| 1747 | case 0x2A7A: *ch = 0x2A79; break; |
| 1748 | case 0x2A7D: *ch = 0x2A7E; break; |
| 1749 | case 0x2A7E: *ch = 0x2A7D; break; |
| 1750 | case 0x2A7F: *ch = 0x2A80; break; |
| 1751 | case 0x2A80: *ch = 0x2A7F; break; |
| 1752 | case 0x2A81: *ch = 0x2A82; break; |
| 1753 | case 0x2A82: *ch = 0x2A81; break; |
| 1754 | case 0x2A83: *ch = 0x2A84; break; |
| 1755 | case 0x2A84: *ch = 0x2A83; break; |
| 1756 | case 0x2A8B: *ch = 0x2A8C; break; |
| 1757 | case 0x2A8C: *ch = 0x2A8B; break; |
| 1758 | case 0x2A91: *ch = 0x2A92; break; |
| 1759 | case 0x2A92: *ch = 0x2A91; break; |
| 1760 | case 0x2A93: *ch = 0x2A94; break; |
| 1761 | case 0x2A94: *ch = 0x2A93; break; |
| 1762 | case 0x2A95: *ch = 0x2A96; break; |
| 1763 | case 0x2A96: *ch = 0x2A95; break; |
| 1764 | case 0x2A97: *ch = 0x2A98; break; |
| 1765 | case 0x2A98: *ch = 0x2A97; break; |
| 1766 | case 0x2A99: *ch = 0x2A9A; break; |
| 1767 | case 0x2A9A: *ch = 0x2A99; break; |
| 1768 | case 0x2A9B: *ch = 0x2A9C; break; |
| 1769 | case 0x2A9C: *ch = 0x2A9B; break; |
| 1770 | case 0x2AA1: *ch = 0x2AA2; break; |
| 1771 | case 0x2AA2: *ch = 0x2AA1; break; |
| 1772 | case 0x2AA6: *ch = 0x2AA7; break; |
| 1773 | case 0x2AA7: *ch = 0x2AA6; break; |
| 1774 | case 0x2AA8: *ch = 0x2AA9; break; |
| 1775 | case 0x2AA9: *ch = 0x2AA8; break; |
| 1776 | case 0x2AAA: *ch = 0x2AAB; break; |
| 1777 | case 0x2AAB: *ch = 0x2AAA; break; |
| 1778 | case 0x2AAC: *ch = 0x2AAD; break; |
| 1779 | case 0x2AAD: *ch = 0x2AAC; break; |
| 1780 | case 0x2AAF: *ch = 0x2AB0; break; |
| 1781 | case 0x2AB0: *ch = 0x2AAF; break; |
| 1782 | case 0x2AB3: *ch = 0x2AB4; break; |
| 1783 | case 0x2AB4: *ch = 0x2AB3; break; |
| 1784 | case 0x2ABB: *ch = 0x2ABC; break; |
| 1785 | case 0x2ABC: *ch = 0x2ABB; break; |
| 1786 | case 0x2ABD: *ch = 0x2ABE; break; |
| 1787 | case 0x2ABE: *ch = 0x2ABD; break; |
| 1788 | case 0x2ABF: *ch = 0x2AC0; break; |
| 1789 | case 0x2AC0: *ch = 0x2ABF; break; |
| 1790 | case 0x2AC1: *ch = 0x2AC2; break; |
| 1791 | case 0x2AC2: *ch = 0x2AC1; break; |
| 1792 | case 0x2AC3: *ch = 0x2AC4; break; |
| 1793 | case 0x2AC4: *ch = 0x2AC3; break; |
| 1794 | case 0x2AC5: *ch = 0x2AC6; break; |
| 1795 | case 0x2AC6: *ch = 0x2AC5; break; |
| 1796 | case 0x2ACD: *ch = 0x2ACE; break; |
| 1797 | case 0x2ACE: *ch = 0x2ACD; break; |
| 1798 | case 0x2ACF: *ch = 0x2AD0; break; |
| 1799 | case 0x2AD0: *ch = 0x2ACF; break; |
| 1800 | case 0x2AD1: *ch = 0x2AD2; break; |
| 1801 | case 0x2AD2: *ch = 0x2AD1; break; |
| 1802 | case 0x2AD3: *ch = 0x2AD4; break; |
| 1803 | case 0x2AD4: *ch = 0x2AD3; break; |
| 1804 | case 0x2AD5: *ch = 0x2AD6; break; |
| 1805 | case 0x2AD6: *ch = 0x2AD5; break; |
| 1806 | case 0x2ADE: *ch = 0x22A6; break; |
| 1807 | case 0x2AE3: *ch = 0x22A9; break; |
| 1808 | case 0x2AE4: *ch = 0x22A8; break; |
| 1809 | case 0x2AE5: *ch = 0x22AB; break; |
| 1810 | case 0x2AEC: *ch = 0x2AED; break; |
| 1811 | case 0x2AED: *ch = 0x2AEC; break; |
| 1812 | case 0x2AF7: *ch = 0x2AF8; break; |
| 1813 | case 0x2AF8: *ch = 0x2AF7; break; |
| 1814 | case 0x2AF9: *ch = 0x2AFA; break; |
| 1815 | case 0x2AFA: *ch = 0x2AF9; break; |
| 1816 | } |
| 1817 | } else if ((*ch & 0xFF00) == 0x3000) { |
| 1818 | switch (*ch) { |
| 1819 | case 0x3008: *ch = 0x3009; break; |
| 1820 | case 0x3009: *ch = 0x3008; break; |
| 1821 | case 0x300A: *ch = 0x300B; break; |
| 1822 | case 0x300B: *ch = 0x300A; break; |
| 1823 | case 0x300C: *ch = 0x300D; break; |
| 1824 | case 0x300D: *ch = 0x300C; break; |
| 1825 | case 0x300E: *ch = 0x300F; break; |
| 1826 | case 0x300F: *ch = 0x300E; break; |
| 1827 | case 0x3010: *ch = 0x3011; break; |
| 1828 | case 0x3011: *ch = 0x3010; break; |
| 1829 | case 0x3014: *ch = 0x3015; break; |
| 1830 | case 0x3015: *ch = 0x3014; break; |
| 1831 | case 0x3016: *ch = 0x3017; break; |
| 1832 | case 0x3017: *ch = 0x3016; break; |
| 1833 | case 0x3018: *ch = 0x3019; break; |
| 1834 | case 0x3019: *ch = 0x3018; break; |
| 1835 | case 0x301A: *ch = 0x301B; break; |
| 1836 | case 0x301B: *ch = 0x301A; break; |
| 1837 | } |
| 1838 | } else if ((*ch & 0xFF00) == 0xFF00) { |
| 1839 | switch (*ch) { |
| 1840 | case 0xFF08: *ch = 0xFF09; break; |
| 1841 | case 0xFF09: *ch = 0xFF08; break; |
| 1842 | case 0xFF1C: *ch = 0xFF1E; break; |
| 1843 | case 0xFF1E: *ch = 0xFF1C; break; |
| 1844 | case 0xFF3B: *ch = 0xFF3D; break; |
| 1845 | case 0xFF3D: *ch = 0xFF3B; break; |
| 1846 | case 0xFF5B: *ch = 0xFF5D; break; |
| 1847 | case 0xFF5D: *ch = 0xFF5B; break; |
| 1848 | case 0xFF5F: *ch = 0xFF60; break; |
| 1849 | case 0xFF60: *ch = 0xFF5F; break; |
| 1850 | case 0xFF62: *ch = 0xFF63; break; |
| 1851 | case 0xFF63: *ch = 0xFF62; break; |
| 1852 | } |
| 1853 | } |
| 1854 | } |
| 1855 | |
| 1856 | #ifdef TEST_GETTYPE |
| 1857 | |
| 1858 | #include <stdio.h> |
| 1859 | #include <assert.h> |
| 1860 | |
| 1861 | int main(int argc, char **argv) |
| 1862 | { |
| 1863 | static const struct { int type; char *name; } typetoname[] = { |
| 1864 | #define TYPETONAME(X) { X , #X } |
| 1865 | TYPETONAME(L), |
| 1866 | TYPETONAME(LRE), |
| 1867 | TYPETONAME(LRO), |
| 1868 | TYPETONAME(R), |
| 1869 | TYPETONAME(AL), |
| 1870 | TYPETONAME(RLE), |
| 1871 | TYPETONAME(RLO), |
| 1872 | TYPETONAME(PDF), |
| 1873 | TYPETONAME(EN), |
| 1874 | TYPETONAME(ES), |
| 1875 | TYPETONAME(ET), |
| 1876 | TYPETONAME(AN), |
| 1877 | TYPETONAME(CS), |
| 1878 | TYPETONAME(NSM), |
| 1879 | TYPETONAME(BN), |
| 1880 | TYPETONAME(B), |
| 1881 | TYPETONAME(S), |
| 1882 | TYPETONAME(WS), |
| 1883 | TYPETONAME(ON), |
| 1884 | #undef TYPETONAME |
| 1885 | }; |
| 1886 | int i; |
| 1887 | |
| 1888 | for (i = 1; i < argc; i++) { |
| 1889 | unsigned long chr = strtoul(argv[i], NULL, 0); |
| 1890 | int type = getType(chr); |
| 1891 | assert(typetoname[type].type == type); |
| 1892 | printf("U+%04x: %s\n", chr, typetoname[type].name); |
| 1893 | } |
| 1894 | |
| 1895 | return 0; |
| 1896 | } |
| 1897 | |
| 1898 | #endif |