Sebastian Kuschel reports that pfd_closing can be called for a socket
[u/mdw/putty] / minibidi.c
1 /************************************************************************
2 * $Id$
3 *
4 * ------------
5 * Description:
6 * ------------
7 * This is an implemention of Unicode's Bidirectional Algorithm
8 * (known as UAX #9).
9 *
10 * http://www.unicode.org/reports/tr9/
11 *
12 * Author: Ahmad Khalifa
13 *
14 * -----------------
15 * Revision Details: (Updated by Revision Control System)
16 * -----------------
17 * $Date$
18 * $Author$
19 * $Revision$
20 *
21 * (www.arabeyes.org - under MIT license)
22 *
23 ************************************************************************/
24
25 /*
26 * TODO:
27 * =====
28 * - Explicit marks need to be handled (they are not 100% now)
29 * - Ligatures
30 */
31
32 #include <stdlib.h> /* definition of wchar_t*/
33
34 #include "misc.h"
35
36 #define LMASK 0x3F /* Embedding Level mask */
37 #define OMASK 0xC0 /* Override mask */
38 #define OISL 0x80 /* Override is L */
39 #define OISR 0x40 /* Override is R */
40
41 /* For standalone compilation in a testing mode.
42 * Still depends on the PuTTY headers for snewn and sfree, but can avoid
43 * _linking_ with any other PuTTY code. */
44 #ifdef TEST_GETTYPE
45 #define safemalloc malloc
46 #define safefree free
47 #endif
48
49 /* Shaping Helpers */
50 #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
51 shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
52 #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
53 #define SFINAL(xh) ((xh)+1)
54 #define SINITIAL(xh) ((xh)+2)
55 #define SMEDIAL(ch) ((ch)+3)
56
57 #define leastGreaterOdd(x) ( ((x)+1) | 1 )
58 #define leastGreaterEven(x) ( ((x)+2) &~ 1 )
59
60 typedef struct bidi_char {
61 unsigned int origwc, wc;
62 unsigned short index;
63 } bidi_char;
64
65 /* function declarations */
66 void flipThisRun(bidi_char *from, unsigned char* level, int max, int count);
67 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel);
68 unsigned char getType(int ch);
69 unsigned char setOverrideBits(unsigned char level, unsigned char override);
70 int getPreviousLevel(unsigned char* level, int from);
71 int do_shape(bidi_char *line, bidi_char *to, int count);
72 int do_bidi(bidi_char *line, int count);
73 void doMirror(unsigned int *ch);
74
75 /* character types */
76 enum {
77 L,
78 LRE,
79 LRO,
80 R,
81 AL,
82 RLE,
83 RLO,
84 PDF,
85 EN,
86 ES,
87 ET,
88 AN,
89 CS,
90 NSM,
91 BN,
92 B,
93 S,
94 WS,
95 ON
96 };
97
98 /* Shaping Types */
99 enum {
100 SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
101 SR, /* Right-Joining, ie has Isolated, Final */
102 SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
103 SU, /* Non-Joining */
104 SC /* Join-Causing, like U+0640 (TATWEEL) */
105 };
106
107 typedef struct {
108 char type;
109 wchar_t form_b;
110 } shape_node;
111
112 /* Kept near the actual table, for verification. */
113 #define SHAPE_FIRST 0x621
114 #define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1)
115
116 const shape_node shapetypes[] = {
117 /* index, Typ, Iso, Ligature Index*/
118 /* 621 */ {SU, 0xFE80},
119 /* 622 */ {SR, 0xFE81},
120 /* 623 */ {SR, 0xFE83},
121 /* 624 */ {SR, 0xFE85},
122 /* 625 */ {SR, 0xFE87},
123 /* 626 */ {SD, 0xFE89},
124 /* 627 */ {SR, 0xFE8D},
125 /* 628 */ {SD, 0xFE8F},
126 /* 629 */ {SR, 0xFE93},
127 /* 62A */ {SD, 0xFE95},
128 /* 62B */ {SD, 0xFE99},
129 /* 62C */ {SD, 0xFE9D},
130 /* 62D */ {SD, 0xFEA1},
131 /* 62E */ {SD, 0xFEA5},
132 /* 62F */ {SR, 0xFEA9},
133 /* 630 */ {SR, 0xFEAB},
134 /* 631 */ {SR, 0xFEAD},
135 /* 632 */ {SR, 0xFEAF},
136 /* 633 */ {SD, 0xFEB1},
137 /* 634 */ {SD, 0xFEB5},
138 /* 635 */ {SD, 0xFEB9},
139 /* 636 */ {SD, 0xFEBD},
140 /* 637 */ {SD, 0xFEC1},
141 /* 638 */ {SD, 0xFEC5},
142 /* 639 */ {SD, 0xFEC9},
143 /* 63A */ {SD, 0xFECD},
144 /* 63B */ {SU, 0x0},
145 /* 63C */ {SU, 0x0},
146 /* 63D */ {SU, 0x0},
147 /* 63E */ {SU, 0x0},
148 /* 63F */ {SU, 0x0},
149 /* 640 */ {SC, 0x0},
150 /* 641 */ {SD, 0xFED1},
151 /* 642 */ {SD, 0xFED5},
152 /* 643 */ {SD, 0xFED9},
153 /* 644 */ {SD, 0xFEDD},
154 /* 645 */ {SD, 0xFEE1},
155 /* 646 */ {SD, 0xFEE5},
156 /* 647 */ {SD, 0xFEE9},
157 /* 648 */ {SR, 0xFEED},
158 /* 649 */ {SR, 0xFEEF}, /* SD */
159 /* 64A */ {SD, 0xFEF1},
160 /* 64B */ {SU, 0x0},
161 /* 64C */ {SU, 0x0},
162 /* 64D */ {SU, 0x0},
163 /* 64E */ {SU, 0x0},
164 /* 64F */ {SU, 0x0},
165 /* 650 */ {SU, 0x0},
166 /* 651 */ {SU, 0x0},
167 /* 652 */ {SU, 0x0},
168 /* 653 */ {SU, 0x0},
169 /* 654 */ {SU, 0x0},
170 /* 655 */ {SU, 0x0},
171 /* 656 */ {SU, 0x0},
172 /* 657 */ {SU, 0x0},
173 /* 658 */ {SU, 0x0},
174 /* 659 */ {SU, 0x0},
175 /* 65A */ {SU, 0x0},
176 /* 65B */ {SU, 0x0},
177 /* 65C */ {SU, 0x0},
178 /* 65D */ {SU, 0x0},
179 /* 65E */ {SU, 0x0},
180 /* 65F */ {SU, 0x0},
181 /* 660 */ {SU, 0x0},
182 /* 661 */ {SU, 0x0},
183 /* 662 */ {SU, 0x0},
184 /* 663 */ {SU, 0x0},
185 /* 664 */ {SU, 0x0},
186 /* 665 */ {SU, 0x0},
187 /* 666 */ {SU, 0x0},
188 /* 667 */ {SU, 0x0},
189 /* 668 */ {SU, 0x0},
190 /* 669 */ {SU, 0x0},
191 /* 66A */ {SU, 0x0},
192 /* 66B */ {SU, 0x0},
193 /* 66C */ {SU, 0x0},
194 /* 66D */ {SU, 0x0},
195 /* 66E */ {SU, 0x0},
196 /* 66F */ {SU, 0x0},
197 /* 670 */ {SU, 0x0},
198 /* 671 */ {SR, 0xFB50},
199 /* 672 */ {SU, 0x0},
200 /* 673 */ {SU, 0x0},
201 /* 674 */ {SU, 0x0},
202 /* 675 */ {SU, 0x0},
203 /* 676 */ {SU, 0x0},
204 /* 677 */ {SU, 0x0},
205 /* 678 */ {SU, 0x0},
206 /* 679 */ {SD, 0xFB66},
207 /* 67A */ {SD, 0xFB5E},
208 /* 67B */ {SD, 0xFB52},
209 /* 67C */ {SU, 0x0},
210 /* 67D */ {SU, 0x0},
211 /* 67E */ {SD, 0xFB56},
212 /* 67F */ {SD, 0xFB62},
213 /* 680 */ {SD, 0xFB5A},
214 /* 681 */ {SU, 0x0},
215 /* 682 */ {SU, 0x0},
216 /* 683 */ {SD, 0xFB76},
217 /* 684 */ {SD, 0xFB72},
218 /* 685 */ {SU, 0x0},
219 /* 686 */ {SD, 0xFB7A},
220 /* 687 */ {SD, 0xFB7E},
221 /* 688 */ {SR, 0xFB88},
222 /* 689 */ {SU, 0x0},
223 /* 68A */ {SU, 0x0},
224 /* 68B */ {SU, 0x0},
225 /* 68C */ {SR, 0xFB84},
226 /* 68D */ {SR, 0xFB82},
227 /* 68E */ {SR, 0xFB86},
228 /* 68F */ {SU, 0x0},
229 /* 690 */ {SU, 0x0},
230 /* 691 */ {SR, 0xFB8C},
231 /* 692 */ {SU, 0x0},
232 /* 693 */ {SU, 0x0},
233 /* 694 */ {SU, 0x0},
234 /* 695 */ {SU, 0x0},
235 /* 696 */ {SU, 0x0},
236 /* 697 */ {SU, 0x0},
237 /* 698 */ {SR, 0xFB8A},
238 /* 699 */ {SU, 0x0},
239 /* 69A */ {SU, 0x0},
240 /* 69B */ {SU, 0x0},
241 /* 69C */ {SU, 0x0},
242 /* 69D */ {SU, 0x0},
243 /* 69E */ {SU, 0x0},
244 /* 69F */ {SU, 0x0},
245 /* 6A0 */ {SU, 0x0},
246 /* 6A1 */ {SU, 0x0},
247 /* 6A2 */ {SU, 0x0},
248 /* 6A3 */ {SU, 0x0},
249 /* 6A4 */ {SD, 0xFB6A},
250 /* 6A5 */ {SU, 0x0},
251 /* 6A6 */ {SD, 0xFB6E},
252 /* 6A7 */ {SU, 0x0},
253 /* 6A8 */ {SU, 0x0},
254 /* 6A9 */ {SD, 0xFB8E},
255 /* 6AA */ {SU, 0x0},
256 /* 6AB */ {SU, 0x0},
257 /* 6AC */ {SU, 0x0},
258 /* 6AD */ {SD, 0xFBD3},
259 /* 6AE */ {SU, 0x0},
260 /* 6AF */ {SD, 0xFB92},
261 /* 6B0 */ {SU, 0x0},
262 /* 6B1 */ {SD, 0xFB9A},
263 /* 6B2 */ {SU, 0x0},
264 /* 6B3 */ {SD, 0xFB96},
265 /* 6B4 */ {SU, 0x0},
266 /* 6B5 */ {SU, 0x0},
267 /* 6B6 */ {SU, 0x0},
268 /* 6B7 */ {SU, 0x0},
269 /* 6B8 */ {SU, 0x0},
270 /* 6B9 */ {SU, 0x0},
271 /* 6BA */ {SR, 0xFB9E},
272 /* 6BB */ {SD, 0xFBA0},
273 /* 6BC */ {SU, 0x0},
274 /* 6BD */ {SU, 0x0},
275 /* 6BE */ {SD, 0xFBAA},
276 /* 6BF */ {SU, 0x0},
277 /* 6C0 */ {SR, 0xFBA4},
278 /* 6C1 */ {SD, 0xFBA6},
279 /* 6C2 */ {SU, 0x0},
280 /* 6C3 */ {SU, 0x0},
281 /* 6C4 */ {SU, 0x0},
282 /* 6C5 */ {SR, 0xFBE0},
283 /* 6C6 */ {SR, 0xFBD9},
284 /* 6C7 */ {SR, 0xFBD7},
285 /* 6C8 */ {SR, 0xFBDB},
286 /* 6C9 */ {SR, 0xFBE2},
287 /* 6CA */ {SU, 0x0},
288 /* 6CB */ {SR, 0xFBDE},
289 /* 6CC */ {SD, 0xFBFC},
290 /* 6CD */ {SU, 0x0},
291 /* 6CE */ {SU, 0x0},
292 /* 6CF */ {SU, 0x0},
293 /* 6D0 */ {SU, 0x0},
294 /* 6D1 */ {SU, 0x0},
295 /* 6D2 */ {SR, 0xFBAE},
296 };
297
298 /*
299 * Flips the text buffer, according to max level, and
300 * all higher levels
301 *
302 * Input:
303 * from: text buffer, on which to apply flipping
304 * level: resolved levels buffer
305 * max: the maximum level found in this line (should be unsigned char)
306 * count: line size in bidi_char
307 */
308 void flipThisRun(bidi_char *from, unsigned char *level, int max, int count)
309 {
310 int i, j, k, tlevel;
311 bidi_char temp;
312
313 j = i = 0;
314 while (i<count && j<count) {
315
316 /* find the start of the run of level=max */
317 tlevel = max;
318 i = j = findIndexOfRun(level, i, count, max);
319 /* find the end of the run */
320 while (i<count && tlevel <= level[i]) {
321 i++;
322 }
323 for (k = i - 1; k > j; k--, j++) {
324 temp = from[k];
325 from[k] = from[j];
326 from[j] = temp;
327 }
328 }
329 }
330
331 /*
332 * Finds the index of a run with level equals tlevel
333 */
334 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel)
335 {
336 int i;
337 for (i=start; i<count; i++) {
338 if (tlevel == level[i]) {
339 return i;
340 }
341 }
342 return count;
343 }
344
345 /*
346 * Returns the bidi character type of ch.
347 *
348 * The data table in this function is constructed from the Unicode
349 * Character Database, downloadable from unicode.org at the URL
350 *
351 * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
352 *
353 * by the following fragment of Perl:
354
355 perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
356 -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
357 -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
358 -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
359 -e '$pfl=$fl; END { &reset }; sub reset {' \
360 -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
361 -e ' if defined $runstart and $runtype ne "ON";' \
362 -e '$runstart=$runend=$num; $runtype=$type;}' \
363 UnicodeData.txt
364
365 */
366 unsigned char getType(int ch)
367 {
368 static const struct {
369 int first, last, type;
370 } lookup[] = {
371 {0x0000, 0x0008, BN},
372 {0x0009, 0x0009, S},
373 {0x000a, 0x000a, B},
374 {0x000b, 0x000b, S},
375 {0x000c, 0x000c, WS},
376 {0x000d, 0x000d, B},
377 {0x000e, 0x001b, BN},
378 {0x001c, 0x001e, B},
379 {0x001f, 0x001f, S},
380 {0x0020, 0x0020, WS},
381 {0x0023, 0x0025, ET},
382 {0x002b, 0x002b, ES},
383 {0x002c, 0x002c, CS},
384 {0x002d, 0x002d, ES},
385 {0x002e, 0x002f, CS},
386 {0x0030, 0x0039, EN},
387 {0x003a, 0x003a, CS},
388 {0x0041, 0x005a, L},
389 {0x0061, 0x007a, L},
390 {0x007f, 0x0084, BN},
391 {0x0085, 0x0085, B},
392 {0x0086, 0x009f, BN},
393 {0x00a0, 0x00a0, CS},
394 {0x00a2, 0x00a5, ET},
395 {0x00aa, 0x00aa, L},
396 {0x00ad, 0x00ad, BN},
397 {0x00b0, 0x00b1, ET},
398 {0x00b2, 0x00b3, EN},
399 {0x00b5, 0x00b5, L},
400 {0x00b9, 0x00b9, EN},
401 {0x00ba, 0x00ba, L},
402 {0x00c0, 0x00d6, L},
403 {0x00d8, 0x00f6, L},
404 {0x00f8, 0x0236, L},
405 {0x0250, 0x02b8, L},
406 {0x02bb, 0x02c1, L},
407 {0x02d0, 0x02d1, L},
408 {0x02e0, 0x02e4, L},
409 {0x02ee, 0x02ee, L},
410 {0x0300, 0x0357, NSM},
411 {0x035d, 0x036f, NSM},
412 {0x037a, 0x037a, L},
413 {0x0386, 0x0386, L},
414 {0x0388, 0x038a, L},
415 {0x038c, 0x038c, L},
416 {0x038e, 0x03a1, L},
417 {0x03a3, 0x03ce, L},
418 {0x03d0, 0x03f5, L},
419 {0x03f7, 0x03fb, L},
420 {0x0400, 0x0482, L},
421 {0x0483, 0x0486, NSM},
422 {0x0488, 0x0489, NSM},
423 {0x048a, 0x04ce, L},
424 {0x04d0, 0x04f5, L},
425 {0x04f8, 0x04f9, L},
426 {0x0500, 0x050f, L},
427 {0x0531, 0x0556, L},
428 {0x0559, 0x055f, L},
429 {0x0561, 0x0587, L},
430 {0x0589, 0x0589, L},
431 {0x0591, 0x05a1, NSM},
432 {0x05a3, 0x05b9, NSM},
433 {0x05bb, 0x05bd, NSM},
434 {0x05be, 0x05be, R},
435 {0x05bf, 0x05bf, NSM},
436 {0x05c0, 0x05c0, R},
437 {0x05c1, 0x05c2, NSM},
438 {0x05c3, 0x05c3, R},
439 {0x05c4, 0x05c4, NSM},
440 {0x05d0, 0x05ea, R},
441 {0x05f0, 0x05f4, R},
442 {0x0600, 0x0603, AL},
443 {0x060c, 0x060c, CS},
444 {0x060d, 0x060d, AL},
445 {0x0610, 0x0615, NSM},
446 {0x061b, 0x061b, AL},
447 {0x061f, 0x061f, AL},
448 {0x0621, 0x063a, AL},
449 {0x0640, 0x064a, AL},
450 {0x064b, 0x0658, NSM},
451 {0x0660, 0x0669, AN},
452 {0x066a, 0x066a, ET},
453 {0x066b, 0x066c, AN},
454 {0x066d, 0x066f, AL},
455 {0x0670, 0x0670, NSM},
456 {0x0671, 0x06d5, AL},
457 {0x06d6, 0x06dc, NSM},
458 {0x06dd, 0x06dd, AL},
459 {0x06de, 0x06e4, NSM},
460 {0x06e5, 0x06e6, AL},
461 {0x06e7, 0x06e8, NSM},
462 {0x06ea, 0x06ed, NSM},
463 {0x06ee, 0x06ef, AL},
464 {0x06f0, 0x06f9, EN},
465 {0x06fa, 0x070d, AL},
466 {0x070f, 0x070f, BN},
467 {0x0710, 0x0710, AL},
468 {0x0711, 0x0711, NSM},
469 {0x0712, 0x072f, AL},
470 {0x0730, 0x074a, NSM},
471 {0x074d, 0x074f, AL},
472 {0x0780, 0x07a5, AL},
473 {0x07a6, 0x07b0, NSM},
474 {0x07b1, 0x07b1, AL},
475 {0x0901, 0x0902, NSM},
476 {0x0903, 0x0939, L},
477 {0x093c, 0x093c, NSM},
478 {0x093d, 0x0940, L},
479 {0x0941, 0x0948, NSM},
480 {0x0949, 0x094c, L},
481 {0x094d, 0x094d, NSM},
482 {0x0950, 0x0950, L},
483 {0x0951, 0x0954, NSM},
484 {0x0958, 0x0961, L},
485 {0x0962, 0x0963, NSM},
486 {0x0964, 0x0970, L},
487 {0x0981, 0x0981, NSM},
488 {0x0982, 0x0983, L},
489 {0x0985, 0x098c, L},
490 {0x098f, 0x0990, L},
491 {0x0993, 0x09a8, L},
492 {0x09aa, 0x09b0, L},
493 {0x09b2, 0x09b2, L},
494 {0x09b6, 0x09b9, L},
495 {0x09bc, 0x09bc, NSM},
496 {0x09bd, 0x09c0, L},
497 {0x09c1, 0x09c4, NSM},
498 {0x09c7, 0x09c8, L},
499 {0x09cb, 0x09cc, L},
500 {0x09cd, 0x09cd, NSM},
501 {0x09d7, 0x09d7, L},
502 {0x09dc, 0x09dd, L},
503 {0x09df, 0x09e1, L},
504 {0x09e2, 0x09e3, NSM},
505 {0x09e6, 0x09f1, L},
506 {0x09f2, 0x09f3, ET},
507 {0x09f4, 0x09fa, L},
508 {0x0a01, 0x0a02, NSM},
509 {0x0a03, 0x0a03, L},
510 {0x0a05, 0x0a0a, L},
511 {0x0a0f, 0x0a10, L},
512 {0x0a13, 0x0a28, L},
513 {0x0a2a, 0x0a30, L},
514 {0x0a32, 0x0a33, L},
515 {0x0a35, 0x0a36, L},
516 {0x0a38, 0x0a39, L},
517 {0x0a3c, 0x0a3c, NSM},
518 {0x0a3e, 0x0a40, L},
519 {0x0a41, 0x0a42, NSM},
520 {0x0a47, 0x0a48, NSM},
521 {0x0a4b, 0x0a4d, NSM},
522 {0x0a59, 0x0a5c, L},
523 {0x0a5e, 0x0a5e, L},
524 {0x0a66, 0x0a6f, L},
525 {0x0a70, 0x0a71, NSM},
526 {0x0a72, 0x0a74, L},
527 {0x0a81, 0x0a82, NSM},
528 {0x0a83, 0x0a83, L},
529 {0x0a85, 0x0a8d, L},
530 {0x0a8f, 0x0a91, L},
531 {0x0a93, 0x0aa8, L},
532 {0x0aaa, 0x0ab0, L},
533 {0x0ab2, 0x0ab3, L},
534 {0x0ab5, 0x0ab9, L},
535 {0x0abc, 0x0abc, NSM},
536 {0x0abd, 0x0ac0, L},
537 {0x0ac1, 0x0ac5, NSM},
538 {0x0ac7, 0x0ac8, NSM},
539 {0x0ac9, 0x0ac9, L},
540 {0x0acb, 0x0acc, L},
541 {0x0acd, 0x0acd, NSM},
542 {0x0ad0, 0x0ad0, L},
543 {0x0ae0, 0x0ae1, L},
544 {0x0ae2, 0x0ae3, NSM},
545 {0x0ae6, 0x0aef, L},
546 {0x0af1, 0x0af1, ET},
547 {0x0b01, 0x0b01, NSM},
548 {0x0b02, 0x0b03, L},
549 {0x0b05, 0x0b0c, L},
550 {0x0b0f, 0x0b10, L},
551 {0x0b13, 0x0b28, L},
552 {0x0b2a, 0x0b30, L},
553 {0x0b32, 0x0b33, L},
554 {0x0b35, 0x0b39, L},
555 {0x0b3c, 0x0b3c, NSM},
556 {0x0b3d, 0x0b3e, L},
557 {0x0b3f, 0x0b3f, NSM},
558 {0x0b40, 0x0b40, L},
559 {0x0b41, 0x0b43, NSM},
560 {0x0b47, 0x0b48, L},
561 {0x0b4b, 0x0b4c, L},
562 {0x0b4d, 0x0b4d, NSM},
563 {0x0b56, 0x0b56, NSM},
564 {0x0b57, 0x0b57, L},
565 {0x0b5c, 0x0b5d, L},
566 {0x0b5f, 0x0b61, L},
567 {0x0b66, 0x0b71, L},
568 {0x0b82, 0x0b82, NSM},
569 {0x0b83, 0x0b83, L},
570 {0x0b85, 0x0b8a, L},
571 {0x0b8e, 0x0b90, L},
572 {0x0b92, 0x0b95, L},
573 {0x0b99, 0x0b9a, L},
574 {0x0b9c, 0x0b9c, L},
575 {0x0b9e, 0x0b9f, L},
576 {0x0ba3, 0x0ba4, L},
577 {0x0ba8, 0x0baa, L},
578 {0x0bae, 0x0bb5, L},
579 {0x0bb7, 0x0bb9, L},
580 {0x0bbe, 0x0bbf, L},
581 {0x0bc0, 0x0bc0, NSM},
582 {0x0bc1, 0x0bc2, L},
583 {0x0bc6, 0x0bc8, L},
584 {0x0bca, 0x0bcc, L},
585 {0x0bcd, 0x0bcd, NSM},
586 {0x0bd7, 0x0bd7, L},
587 {0x0be7, 0x0bf2, L},
588 {0x0bf9, 0x0bf9, ET},
589 {0x0c01, 0x0c03, L},
590 {0x0c05, 0x0c0c, L},
591 {0x0c0e, 0x0c10, L},
592 {0x0c12, 0x0c28, L},
593 {0x0c2a, 0x0c33, L},
594 {0x0c35, 0x0c39, L},
595 {0x0c3e, 0x0c40, NSM},
596 {0x0c41, 0x0c44, L},
597 {0x0c46, 0x0c48, NSM},
598 {0x0c4a, 0x0c4d, NSM},
599 {0x0c55, 0x0c56, NSM},
600 {0x0c60, 0x0c61, L},
601 {0x0c66, 0x0c6f, L},
602 {0x0c82, 0x0c83, L},
603 {0x0c85, 0x0c8c, L},
604 {0x0c8e, 0x0c90, L},
605 {0x0c92, 0x0ca8, L},
606 {0x0caa, 0x0cb3, L},
607 {0x0cb5, 0x0cb9, L},
608 {0x0cbc, 0x0cbc, NSM},
609 {0x0cbd, 0x0cc4, L},
610 {0x0cc6, 0x0cc8, L},
611 {0x0cca, 0x0ccb, L},
612 {0x0ccc, 0x0ccd, NSM},
613 {0x0cd5, 0x0cd6, L},
614 {0x0cde, 0x0cde, L},
615 {0x0ce0, 0x0ce1, L},
616 {0x0ce6, 0x0cef, L},
617 {0x0d02, 0x0d03, L},
618 {0x0d05, 0x0d0c, L},
619 {0x0d0e, 0x0d10, L},
620 {0x0d12, 0x0d28, L},
621 {0x0d2a, 0x0d39, L},
622 {0x0d3e, 0x0d40, L},
623 {0x0d41, 0x0d43, NSM},
624 {0x0d46, 0x0d48, L},
625 {0x0d4a, 0x0d4c, L},
626 {0x0d4d, 0x0d4d, NSM},
627 {0x0d57, 0x0d57, L},
628 {0x0d60, 0x0d61, L},
629 {0x0d66, 0x0d6f, L},
630 {0x0d82, 0x0d83, L},
631 {0x0d85, 0x0d96, L},
632 {0x0d9a, 0x0db1, L},
633 {0x0db3, 0x0dbb, L},
634 {0x0dbd, 0x0dbd, L},
635 {0x0dc0, 0x0dc6, L},
636 {0x0dca, 0x0dca, NSM},
637 {0x0dcf, 0x0dd1, L},
638 {0x0dd2, 0x0dd4, NSM},
639 {0x0dd6, 0x0dd6, NSM},
640 {0x0dd8, 0x0ddf, L},
641 {0x0df2, 0x0df4, L},
642 {0x0e01, 0x0e30, L},
643 {0x0e31, 0x0e31, NSM},
644 {0x0e32, 0x0e33, L},
645 {0x0e34, 0x0e3a, NSM},
646 {0x0e3f, 0x0e3f, ET},
647 {0x0e40, 0x0e46, L},
648 {0x0e47, 0x0e4e, NSM},
649 {0x0e4f, 0x0e5b, L},
650 {0x0e81, 0x0e82, L},
651 {0x0e84, 0x0e84, L},
652 {0x0e87, 0x0e88, L},
653 {0x0e8a, 0x0e8a, L},
654 {0x0e8d, 0x0e8d, L},
655 {0x0e94, 0x0e97, L},
656 {0x0e99, 0x0e9f, L},
657 {0x0ea1, 0x0ea3, L},
658 {0x0ea5, 0x0ea5, L},
659 {0x0ea7, 0x0ea7, L},
660 {0x0eaa, 0x0eab, L},
661 {0x0ead, 0x0eb0, L},
662 {0x0eb1, 0x0eb1, NSM},
663 {0x0eb2, 0x0eb3, L},
664 {0x0eb4, 0x0eb9, NSM},
665 {0x0ebb, 0x0ebc, NSM},
666 {0x0ebd, 0x0ebd, L},
667 {0x0ec0, 0x0ec4, L},
668 {0x0ec6, 0x0ec6, L},
669 {0x0ec8, 0x0ecd, NSM},
670 {0x0ed0, 0x0ed9, L},
671 {0x0edc, 0x0edd, L},
672 {0x0f00, 0x0f17, L},
673 {0x0f18, 0x0f19, NSM},
674 {0x0f1a, 0x0f34, L},
675 {0x0f35, 0x0f35, NSM},
676 {0x0f36, 0x0f36, L},
677 {0x0f37, 0x0f37, NSM},
678 {0x0f38, 0x0f38, L},
679 {0x0f39, 0x0f39, NSM},
680 {0x0f3e, 0x0f47, L},
681 {0x0f49, 0x0f6a, L},
682 {0x0f71, 0x0f7e, NSM},
683 {0x0f7f, 0x0f7f, L},
684 {0x0f80, 0x0f84, NSM},
685 {0x0f85, 0x0f85, L},
686 {0x0f86, 0x0f87, NSM},
687 {0x0f88, 0x0f8b, L},
688 {0x0f90, 0x0f97, NSM},
689 {0x0f99, 0x0fbc, NSM},
690 {0x0fbe, 0x0fc5, L},
691 {0x0fc6, 0x0fc6, NSM},
692 {0x0fc7, 0x0fcc, L},
693 {0x0fcf, 0x0fcf, L},
694 {0x1000, 0x1021, L},
695 {0x1023, 0x1027, L},
696 {0x1029, 0x102a, L},
697 {0x102c, 0x102c, L},
698 {0x102d, 0x1030, NSM},
699 {0x1031, 0x1031, L},
700 {0x1032, 0x1032, NSM},
701 {0x1036, 0x1037, NSM},
702 {0x1038, 0x1038, L},
703 {0x1039, 0x1039, NSM},
704 {0x1040, 0x1057, L},
705 {0x1058, 0x1059, NSM},
706 {0x10a0, 0x10c5, L},
707 {0x10d0, 0x10f8, L},
708 {0x10fb, 0x10fb, L},
709 {0x1100, 0x1159, L},
710 {0x115f, 0x11a2, L},
711 {0x11a8, 0x11f9, L},
712 {0x1200, 0x1206, L},
713 {0x1208, 0x1246, L},
714 {0x1248, 0x1248, L},
715 {0x124a, 0x124d, L},
716 {0x1250, 0x1256, L},
717 {0x1258, 0x1258, L},
718 {0x125a, 0x125d, L},
719 {0x1260, 0x1286, L},
720 {0x1288, 0x1288, L},
721 {0x128a, 0x128d, L},
722 {0x1290, 0x12ae, L},
723 {0x12b0, 0x12b0, L},
724 {0x12b2, 0x12b5, L},
725 {0x12b8, 0x12be, L},
726 {0x12c0, 0x12c0, L},
727 {0x12c2, 0x12c5, L},
728 {0x12c8, 0x12ce, L},
729 {0x12d0, 0x12d6, L},
730 {0x12d8, 0x12ee, L},
731 {0x12f0, 0x130e, L},
732 {0x1310, 0x1310, L},
733 {0x1312, 0x1315, L},
734 {0x1318, 0x131e, L},
735 {0x1320, 0x1346, L},
736 {0x1348, 0x135a, L},
737 {0x1361, 0x137c, L},
738 {0x13a0, 0x13f4, L},
739 {0x1401, 0x1676, L},
740 {0x1680, 0x1680, WS},
741 {0x1681, 0x169a, L},
742 {0x16a0, 0x16f0, L},
743 {0x1700, 0x170c, L},
744 {0x170e, 0x1711, L},
745 {0x1712, 0x1714, NSM},
746 {0x1720, 0x1731, L},
747 {0x1732, 0x1734, NSM},
748 {0x1735, 0x1736, L},
749 {0x1740, 0x1751, L},
750 {0x1752, 0x1753, NSM},
751 {0x1760, 0x176c, L},
752 {0x176e, 0x1770, L},
753 {0x1772, 0x1773, NSM},
754 {0x1780, 0x17b6, L},
755 {0x17b7, 0x17bd, NSM},
756 {0x17be, 0x17c5, L},
757 {0x17c6, 0x17c6, NSM},
758 {0x17c7, 0x17c8, L},
759 {0x17c9, 0x17d3, NSM},
760 {0x17d4, 0x17da, L},
761 {0x17db, 0x17db, ET},
762 {0x17dc, 0x17dc, L},
763 {0x17dd, 0x17dd, NSM},
764 {0x17e0, 0x17e9, L},
765 {0x180b, 0x180d, NSM},
766 {0x180e, 0x180e, WS},
767 {0x1810, 0x1819, L},
768 {0x1820, 0x1877, L},
769 {0x1880, 0x18a8, L},
770 {0x18a9, 0x18a9, NSM},
771 {0x1900, 0x191c, L},
772 {0x1920, 0x1922, NSM},
773 {0x1923, 0x1926, L},
774 {0x1927, 0x192b, NSM},
775 {0x1930, 0x1931, L},
776 {0x1932, 0x1932, NSM},
777 {0x1933, 0x1938, L},
778 {0x1939, 0x193b, NSM},
779 {0x1946, 0x196d, L},
780 {0x1970, 0x1974, L},
781 {0x1d00, 0x1d6b, L},
782 {0x1e00, 0x1e9b, L},
783 {0x1ea0, 0x1ef9, L},
784 {0x1f00, 0x1f15, L},
785 {0x1f18, 0x1f1d, L},
786 {0x1f20, 0x1f45, L},
787 {0x1f48, 0x1f4d, L},
788 {0x1f50, 0x1f57, L},
789 {0x1f59, 0x1f59, L},
790 {0x1f5b, 0x1f5b, L},
791 {0x1f5d, 0x1f5d, L},
792 {0x1f5f, 0x1f7d, L},
793 {0x1f80, 0x1fb4, L},
794 {0x1fb6, 0x1fbc, L},
795 {0x1fbe, 0x1fbe, L},
796 {0x1fc2, 0x1fc4, L},
797 {0x1fc6, 0x1fcc, L},
798 {0x1fd0, 0x1fd3, L},
799 {0x1fd6, 0x1fdb, L},
800 {0x1fe0, 0x1fec, L},
801 {0x1ff2, 0x1ff4, L},
802 {0x1ff6, 0x1ffc, L},
803 {0x2000, 0x200a, WS},
804 {0x200b, 0x200d, BN},
805 {0x200e, 0x200e, L},
806 {0x200f, 0x200f, R},
807 {0x2028, 0x2028, WS},
808 {0x2029, 0x2029, B},
809 {0x202a, 0x202a, LRE},
810 {0x202b, 0x202b, RLE},
811 {0x202c, 0x202c, PDF},
812 {0x202d, 0x202d, LRO},
813 {0x202e, 0x202e, RLO},
814 {0x202f, 0x202f, WS},
815 {0x2030, 0x2034, ET},
816 {0x2044, 0x2044, CS},
817 {0x205f, 0x205f, WS},
818 {0x2060, 0x2063, BN},
819 {0x206a, 0x206f, BN},
820 {0x2070, 0x2070, EN},
821 {0x2071, 0x2071, L},
822 {0x2074, 0x2079, EN},
823 {0x207a, 0x207b, ET},
824 {0x207f, 0x207f, L},
825 {0x2080, 0x2089, EN},
826 {0x208a, 0x208b, ET},
827 {0x20a0, 0x20b1, ET},
828 {0x20d0, 0x20ea, NSM},
829 {0x2102, 0x2102, L},
830 {0x2107, 0x2107, L},
831 {0x210a, 0x2113, L},
832 {0x2115, 0x2115, L},
833 {0x2119, 0x211d, L},
834 {0x2124, 0x2124, L},
835 {0x2126, 0x2126, L},
836 {0x2128, 0x2128, L},
837 {0x212a, 0x212d, L},
838 {0x212e, 0x212e, ET},
839 {0x212f, 0x2131, L},
840 {0x2133, 0x2139, L},
841 {0x213d, 0x213f, L},
842 {0x2145, 0x2149, L},
843 {0x2160, 0x2183, L},
844 {0x2212, 0x2213, ET},
845 {0x2336, 0x237a, L},
846 {0x2395, 0x2395, L},
847 {0x2488, 0x249b, EN},
848 {0x249c, 0x24e9, L},
849 {0x2800, 0x28ff, L},
850 {0x3000, 0x3000, WS},
851 {0x3005, 0x3007, L},
852 {0x3021, 0x3029, L},
853 {0x302a, 0x302f, NSM},
854 {0x3031, 0x3035, L},
855 {0x3038, 0x303c, L},
856 {0x3041, 0x3096, L},
857 {0x3099, 0x309a, NSM},
858 {0x309d, 0x309f, L},
859 {0x30a1, 0x30fa, L},
860 {0x30fc, 0x30ff, L},
861 {0x3105, 0x312c, L},
862 {0x3131, 0x318e, L},
863 {0x3190, 0x31b7, L},
864 {0x31f0, 0x321c, L},
865 {0x3220, 0x3243, L},
866 {0x3260, 0x327b, L},
867 {0x327f, 0x32b0, L},
868 {0x32c0, 0x32cb, L},
869 {0x32d0, 0x32fe, L},
870 {0x3300, 0x3376, L},
871 {0x337b, 0x33dd, L},
872 {0x33e0, 0x33fe, L},
873 {0x3400, 0x4db5, L},
874 {0x4e00, 0x9fa5, L},
875 {0xa000, 0xa48c, L},
876 {0xac00, 0xd7a3, L},
877 {0xd800, 0xfa2d, L},
878 {0xfa30, 0xfa6a, L},
879 {0xfb00, 0xfb06, L},
880 {0xfb13, 0xfb17, L},
881 {0xfb1d, 0xfb1d, R},
882 {0xfb1e, 0xfb1e, NSM},
883 {0xfb1f, 0xfb28, R},
884 {0xfb29, 0xfb29, ET},
885 {0xfb2a, 0xfb36, R},
886 {0xfb38, 0xfb3c, R},
887 {0xfb3e, 0xfb3e, R},
888 {0xfb40, 0xfb41, R},
889 {0xfb43, 0xfb44, R},
890 {0xfb46, 0xfb4f, R},
891 {0xfb50, 0xfbb1, AL},
892 {0xfbd3, 0xfd3d, AL},
893 {0xfd50, 0xfd8f, AL},
894 {0xfd92, 0xfdc7, AL},
895 {0xfdf0, 0xfdfc, AL},
896 {0xfe00, 0xfe0f, NSM},
897 {0xfe20, 0xfe23, NSM},
898 {0xfe50, 0xfe50, CS},
899 {0xfe52, 0xfe52, CS},
900 {0xfe55, 0xfe55, CS},
901 {0xfe5f, 0xfe5f, ET},
902 {0xfe62, 0xfe63, ET},
903 {0xfe69, 0xfe6a, ET},
904 {0xfe70, 0xfe74, AL},
905 {0xfe76, 0xfefc, AL},
906 {0xfeff, 0xfeff, BN},
907 {0xff03, 0xff05, ET},
908 {0xff0b, 0xff0b, ET},
909 {0xff0c, 0xff0c, CS},
910 {0xff0d, 0xff0d, ET},
911 {0xff0e, 0xff0e, CS},
912 {0xff0f, 0xff0f, ES},
913 {0xff10, 0xff19, EN},
914 {0xff1a, 0xff1a, CS},
915 {0xff21, 0xff3a, L},
916 {0xff41, 0xff5a, L},
917 {0xff66, 0xffbe, L},
918 {0xffc2, 0xffc7, L},
919 {0xffca, 0xffcf, L},
920 {0xffd2, 0xffd7, L},
921 {0xffda, 0xffdc, L},
922 {0xffe0, 0xffe1, ET},
923 {0xffe5, 0xffe6, ET},
924 {0x10000, 0x1000b, L},
925 {0x1000d, 0x10026, L},
926 {0x10028, 0x1003a, L},
927 {0x1003c, 0x1003d, L},
928 {0x1003f, 0x1004d, L},
929 {0x10050, 0x1005d, L},
930 {0x10080, 0x100fa, L},
931 {0x10100, 0x10100, L},
932 {0x10102, 0x10102, L},
933 {0x10107, 0x10133, L},
934 {0x10137, 0x1013f, L},
935 {0x10300, 0x1031e, L},
936 {0x10320, 0x10323, L},
937 {0x10330, 0x1034a, L},
938 {0x10380, 0x1039d, L},
939 {0x1039f, 0x1039f, L},
940 {0x10400, 0x1049d, L},
941 {0x104a0, 0x104a9, L},
942 {0x10800, 0x10805, R},
943 {0x10808, 0x10808, R},
944 {0x1080a, 0x10835, R},
945 {0x10837, 0x10838, R},
946 {0x1083c, 0x1083c, R},
947 {0x1083f, 0x1083f, R},
948 {0x1d000, 0x1d0f5, L},
949 {0x1d100, 0x1d126, L},
950 {0x1d12a, 0x1d166, L},
951 {0x1d167, 0x1d169, NSM},
952 {0x1d16a, 0x1d172, L},
953 {0x1d173, 0x1d17a, BN},
954 {0x1d17b, 0x1d182, NSM},
955 {0x1d183, 0x1d184, L},
956 {0x1d185, 0x1d18b, NSM},
957 {0x1d18c, 0x1d1a9, L},
958 {0x1d1aa, 0x1d1ad, NSM},
959 {0x1d1ae, 0x1d1dd, L},
960 {0x1d400, 0x1d454, L},
961 {0x1d456, 0x1d49c, L},
962 {0x1d49e, 0x1d49f, L},
963 {0x1d4a2, 0x1d4a2, L},
964 {0x1d4a5, 0x1d4a6, L},
965 {0x1d4a9, 0x1d4ac, L},
966 {0x1d4ae, 0x1d4b9, L},
967 {0x1d4bb, 0x1d4bb, L},
968 {0x1d4bd, 0x1d4c3, L},
969 {0x1d4c5, 0x1d505, L},
970 {0x1d507, 0x1d50a, L},
971 {0x1d50d, 0x1d514, L},
972 {0x1d516, 0x1d51c, L},
973 {0x1d51e, 0x1d539, L},
974 {0x1d53b, 0x1d53e, L},
975 {0x1d540, 0x1d544, L},
976 {0x1d546, 0x1d546, L},
977 {0x1d54a, 0x1d550, L},
978 {0x1d552, 0x1d6a3, L},
979 {0x1d6a8, 0x1d7c9, L},
980 {0x1d7ce, 0x1d7ff, EN},
981 {0x20000, 0x2a6d6, L},
982 {0x2f800, 0x2fa1d, L},
983 {0xe0001, 0xe0001, BN},
984 {0xe0020, 0xe007f, BN},
985 {0xe0100, 0xe01ef, NSM},
986 {0xf0000, 0xffffd, L},
987 {0x100000, 0x10fffd, L}
988 };
989
990 int i, j, k;
991
992 i = -1;
993 j = lenof(lookup);
994
995 while (j - i > 1) {
996 k = (i + j) / 2;
997 if (ch < lookup[k].first)
998 j = k;
999 else if (ch > lookup[k].last)
1000 i = k;
1001 else
1002 return lookup[k].type;
1003 }
1004
1005 /*
1006 * If we reach here, the character was not in any of the
1007 * intervals listed in the lookup table. This means we return
1008 * ON (`Other Neutrals'). This is the appropriate code for any
1009 * character genuinely not listed in the Unicode table, and
1010 * also the table above has deliberately left out any
1011 * characters _explicitly_ listed as ON (to save space!).
1012 */
1013 return ON;
1014 }
1015
1016 /*
1017 * Function exported to front ends to allow them to identify
1018 * bidi-active characters (in case, for example, the platform's
1019 * text display function can't conveniently be prevented from doing
1020 * its own bidi and so special treatment is required for characters
1021 * that would cause the bidi algorithm to activate).
1022 *
1023 * This function is passed a single Unicode code point, and returns
1024 * nonzero if the presence of this code point can possibly cause
1025 * the bidi algorithm to do any reordering. Thus, any string
1026 * composed entirely of characters for which is_rtl() returns zero
1027 * should be safe to pass to a bidi-active platform display
1028 * function without fear.
1029 *
1030 * (is_rtl() must therefore also return true for any character
1031 * which would be affected by Arabic shaping, but this isn't
1032 * important because all such characters are right-to-left so it
1033 * would have flagged them anyway.)
1034 */
1035 int is_rtl(int c)
1036 {
1037 /*
1038 * After careful reading of the Unicode bidi algorithm (URL as
1039 * given at the top of this file) I believe that the only
1040 * character classes which can possibly cause trouble are R,
1041 * AL, RLE and RLO. I think that any string containing no
1042 * character in any of those classes will be displayed
1043 * uniformly left-to-right by the Unicode bidi algorithm.
1044 */
1045 const int mask = (1<<R) | (1<<AL) | (1<<RLE) | (1<<RLO);
1046
1047 return mask & (1 << (getType(c)));
1048 }
1049
1050 /*
1051 * The most significant 2 bits of each level are used to store
1052 * Override status of each character
1053 * This function sets the override bits of level according
1054 * to the value in override, and reurns the new byte.
1055 */
1056 unsigned char setOverrideBits(unsigned char level, unsigned char override)
1057 {
1058 if (override == ON)
1059 return level;
1060 else if (override == R)
1061 return level | OISR;
1062 else if (override == L)
1063 return level | OISL;
1064 return level;
1065 }
1066
1067 /*
1068 * Find the most recent run of the same value in `level', and
1069 * return the value _before_ it. Used to process U+202C POP
1070 * DIRECTIONAL FORMATTING.
1071 */
1072 int getPreviousLevel(unsigned char* level, int from)
1073 {
1074 if (from > 0) {
1075 unsigned char current = level[--from];
1076
1077 while (from >= 0 && level[from] == current)
1078 from--;
1079
1080 if (from >= 0)
1081 return level[from];
1082
1083 return -1;
1084 } else
1085 return -1;
1086 }
1087
1088 /* The Main shaping function, and the only one to be used
1089 * by the outside world.
1090 *
1091 * line: buffer to apply shaping to. this must be passed by doBidi() first
1092 * to: output buffer for the shaped data
1093 * count: number of characters in line
1094 */
1095 int do_shape(bidi_char *line, bidi_char *to, int count)
1096 {
1097 int i, tempShape, ligFlag;
1098
1099 for (ligFlag=i=0; i<count; i++) {
1100 to[i] = line[i];
1101 tempShape = STYPE(line[i].wc);
1102 switch (tempShape) {
1103 case SC:
1104 break;
1105
1106 case SU:
1107 break;
1108
1109 case SR:
1110 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
1111 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1112 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
1113 else
1114 to[i].wc = SISOLATED(line[i].wc);
1115 break;
1116
1117
1118 case SD:
1119 /* Make Ligatures */
1120 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
1121 if (line[i].wc == 0x644) {
1122 if (i > 0) switch (line[i-1].wc) {
1123 case 0x622:
1124 ligFlag = 1;
1125 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1126 to[i].wc = 0xFEF6;
1127 else
1128 to[i].wc = 0xFEF5;
1129 break;
1130 case 0x623:
1131 ligFlag = 1;
1132 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1133 to[i].wc = 0xFEF8;
1134 else
1135 to[i].wc = 0xFEF7;
1136 break;
1137 case 0x625:
1138 ligFlag = 1;
1139 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1140 to[i].wc = 0xFEFA;
1141 else
1142 to[i].wc = 0xFEF9;
1143 break;
1144 case 0x627:
1145 ligFlag = 1;
1146 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1147 to[i].wc = 0xFEFC;
1148 else
1149 to[i].wc = 0xFEFB;
1150 break;
1151 }
1152 if (ligFlag) {
1153 to[i-1].wc = 0x20;
1154 ligFlag = 0;
1155 break;
1156 }
1157 }
1158
1159 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
1160 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
1161 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
1162 to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
1163 else
1164 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
1165 break;
1166 }
1167
1168 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
1169 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
1170 to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
1171 else
1172 to[i].wc = SISOLATED(line[i].wc);
1173 break;
1174
1175
1176 }
1177 }
1178 return 1;
1179 }
1180
1181 /*
1182 * The Main Bidi Function, and the only function that should
1183 * be used by the outside world.
1184 *
1185 * line: a buffer of size count containing text to apply
1186 * the Bidirectional algorithm to.
1187 */
1188
1189 int do_bidi(bidi_char *line, int count)
1190 {
1191 unsigned char* types;
1192 unsigned char* levels;
1193 unsigned char paragraphLevel;
1194 unsigned char currentEmbedding;
1195 unsigned char currentOverride;
1196 unsigned char tempType;
1197 int i, j, yes, bover;
1198
1199 /* Check the presence of R or AL types as optimization */
1200 yes = 0;
1201 for (i=0; i<count; i++) {
1202 int type = getType(line[i].wc);
1203 if (type == R || type == AL) {
1204 yes = 1;
1205 break;
1206 }
1207 }
1208 if (yes == 0)
1209 return L;
1210
1211 /* Initialize types, levels */
1212 types = snewn(count, unsigned char);
1213 levels = snewn(count, unsigned char);
1214
1215 /* Rule (P1) NOT IMPLEMENTED
1216 * P1. Split the text into separate paragraphs. A paragraph separator is
1217 * kept with the previous paragraph. Within each paragraph, apply all the
1218 * other rules of this algorithm.
1219 */
1220
1221 /* Rule (P2), (P3)
1222 * P2. In each paragraph, find the first character of type L, AL, or R.
1223 * P3. If a character is found in P2 and it is of type AL or R, then set
1224 * the paragraph embedding level to one; otherwise, set it to zero.
1225 */
1226 paragraphLevel = 0;
1227 for (i=0; i<count ; i++) {
1228 int type = getType(line[i].wc);
1229 if (type == R || type == AL) {
1230 paragraphLevel = 1;
1231 break;
1232 } else if (type == L)
1233 break;
1234 }
1235
1236 /* Rule (X1)
1237 * X1. Begin by setting the current embedding level to the paragraph
1238 * embedding level. Set the directional override status to neutral.
1239 */
1240 currentEmbedding = paragraphLevel;
1241 currentOverride = ON;
1242
1243 /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
1244 * X2. With each RLE, compute the least greater odd embedding level.
1245 * X3. With each LRE, compute the least greater even embedding level.
1246 * X4. With each RLO, compute the least greater odd embedding level.
1247 * X5. With each LRO, compute the least greater even embedding level.
1248 * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
1249 * a. Set the level of the current character to the current
1250 * embedding level.
1251 * b. Whenever the directional override status is not neutral,
1252 * reset the current character type to the directional
1253 * override status.
1254 * X7. With each PDF, determine the matching embedding or override code.
1255 * If there was a valid matching code, restore (pop) the last
1256 * remembered (pushed) embedding level and directional override.
1257 * X8. All explicit directional embeddings and overrides are completely
1258 * terminated at the end of each paragraph. Paragraph separators are not
1259 * included in the embedding. (Useless here) NOT IMPLEMENTED
1260 */
1261 bover = 0;
1262 for (i=0; i<count; i++) {
1263 tempType = getType(line[i].wc);
1264 switch (tempType) {
1265 case RLE:
1266 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1267 levels[i] = setOverrideBits(levels[i], currentOverride);
1268 currentOverride = ON;
1269 break;
1270
1271 case LRE:
1272 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1273 levels[i] = setOverrideBits(levels[i], currentOverride);
1274 currentOverride = ON;
1275 break;
1276
1277 case RLO:
1278 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1279 tempType = currentOverride = R;
1280 bover = 1;
1281 break;
1282
1283 case LRO:
1284 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1285 tempType = currentOverride = L;
1286 bover = 1;
1287 break;
1288
1289 case PDF:
1290 {
1291 int prevlevel = getPreviousLevel(levels, i);
1292
1293 if (prevlevel == -1) {
1294 currentEmbedding = paragraphLevel;
1295 currentOverride = ON;
1296 } else {
1297 currentOverride = currentEmbedding & OMASK;
1298 currentEmbedding = currentEmbedding & ~OMASK;
1299 }
1300 }
1301 levels[i] = currentEmbedding;
1302 break;
1303
1304 /* Whitespace is treated as neutral for now */
1305 case WS:
1306 case S:
1307 levels[i] = currentEmbedding;
1308 tempType = ON;
1309 if (currentOverride != ON)
1310 tempType = currentOverride;
1311 break;
1312
1313 default:
1314 levels[i] = currentEmbedding;
1315 if (currentOverride != ON)
1316 tempType = currentOverride;
1317 break;
1318
1319 }
1320 types[i] = tempType;
1321 }
1322 /* this clears out all overrides, so we can use levels safely... */
1323 /* checks bover first */
1324 if (bover)
1325 for (i=0; i<count; i++)
1326 levels[i] = levels[i] & LMASK;
1327
1328 /* Rule (X9)
1329 * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
1330 * Here, they're converted to BN.
1331 */
1332 for (i=0; i<count; i++) {
1333 switch (types[i]) {
1334 case RLE:
1335 case LRE:
1336 case RLO:
1337 case LRO:
1338 case PDF:
1339 types[i] = BN;
1340 break;
1341 }
1342 }
1343
1344 /* Rule (W1)
1345 * W1. Examine each non-spacing mark (NSM) in the level run, and change
1346 * the type of the NSM to the type of the previous character. If the NSM
1347 * is at the start of the level run, it will get the type of sor.
1348 */
1349 if (types[0] == NSM)
1350 types[0] = paragraphLevel;
1351
1352 for (i=1; i<count; i++) {
1353 if (types[i] == NSM)
1354 types[i] = types[i-1];
1355 /* Is this a safe assumption?
1356 * I assumed the previous, IS a character.
1357 */
1358 }
1359
1360 /* Rule (W2)
1361 * W2. Search backwards from each instance of a European number until the
1362 * first strong type (R, L, AL, or sor) is found. If an AL is found,
1363 * change the type of the European number to Arabic number.
1364 */
1365 for (i=0; i<count; i++) {
1366 if (types[i] == EN) {
1367 j=i;
1368 while (j >= 0) {
1369 if (types[j] == AL) {
1370 types[i] = AN;
1371 break;
1372 } else if (types[j] == R || types[j] == L) {
1373 break;
1374 }
1375 j--;
1376 }
1377 }
1378 }
1379
1380 /* Rule (W3)
1381 * W3. Change all ALs to R.
1382 *
1383 * Optimization: on Rule Xn, we might set a flag on AL type
1384 * to prevent this loop in L R lines only...
1385 */
1386 for (i=0; i<count; i++) {
1387 if (types[i] == AL)
1388 types[i] = R;
1389 }
1390
1391 /* Rule (W4)
1392 * W4. A single European separator between two European numbers changes
1393 * to a European number. A single common separator between two numbers
1394 * of the same type changes to that type.
1395 */
1396 for (i=1; i<(count-1); i++) {
1397 if (types[i] == ES) {
1398 if (types[i-1] == EN && types[i+1] == EN)
1399 types[i] = EN;
1400 } else if (types[i] == CS) {
1401 if (types[i-1] == EN && types[i+1] == EN)
1402 types[i] = EN;
1403 else if (types[i-1] == AN && types[i+1] == AN)
1404 types[i] = AN;
1405 }
1406 }
1407
1408 /* Rule (W5)
1409 * W5. A sequence of European terminators adjacent to European numbers
1410 * changes to all European numbers.
1411 *
1412 * Optimization: lots here... else ifs need rearrangement
1413 */
1414 for (i=0; i<count; i++) {
1415 if (types[i] == ET) {
1416 if (i > 0 && types[i-1] == EN) {
1417 types[i] = EN;
1418 continue;
1419 } else if (i < count-1 && types[i+1] == EN) {
1420 types[i] = EN;
1421 continue;
1422 } else if (i < count-1 && types[i+1] == ET) {
1423 j=i;
1424 while (j <count && types[j] == ET) {
1425 j++;
1426 }
1427 if (types[j] == EN)
1428 types[i] = EN;
1429 }
1430 }
1431 }
1432
1433 /* Rule (W6)
1434 * W6. Otherwise, separators and terminators change to Other Neutral:
1435 */
1436 for (i=0; i<count; i++) {
1437 switch (types[i]) {
1438 case ES:
1439 case ET:
1440 case CS:
1441 types[i] = ON;
1442 break;
1443 }
1444 }
1445
1446 /* Rule (W7)
1447 * W7. Search backwards from each instance of a European number until
1448 * the first strong type (R, L, or sor) is found. If an L is found,
1449 * then change the type of the European number to L.
1450 */
1451 for (i=0; i<count; i++) {
1452 if (types[i] == EN) {
1453 j=i;
1454 while (j >= 0) {
1455 if (types[j] == L) {
1456 types[i] = L;
1457 break;
1458 } else if (types[j] == R || types[j] == AL) {
1459 break;
1460 }
1461 j--;
1462 }
1463 }
1464 }
1465
1466 /* Rule (N1)
1467 * N1. A sequence of neutrals takes the direction of the surrounding
1468 * strong text if the text on both sides has the same direction. European
1469 * and Arabic numbers are treated as though they were R.
1470 */
1471 if (count >= 2 && types[0] == ON) {
1472 if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
1473 types[0] = R;
1474 else if (types[1] == L)
1475 types[0] = L;
1476 }
1477 for (i=1; i<(count-1); i++) {
1478 if (types[i] == ON) {
1479 if (types[i-1] == L) {
1480 j=i;
1481 while (j<(count-1) && types[j] == ON) {
1482 j++;
1483 }
1484 if (types[j] == L) {
1485 while (i<j) {
1486 types[i] = L;
1487 i++;
1488 }
1489 }
1490
1491 } else if ((types[i-1] == R) ||
1492 (types[i-1] == EN) ||
1493 (types[i-1] == AN)) {
1494 j=i;
1495 while (j<(count-1) && types[j] == ON) {
1496 j++;
1497 }
1498 if ((types[j] == R) ||
1499 (types[j] == EN) ||
1500 (types[j] == AN)) {
1501 while (i<j) {
1502 types[i] = R;
1503 i++;
1504 }
1505 }
1506 }
1507 }
1508 }
1509 if (count >= 2 && types[count-1] == ON) {
1510 if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
1511 types[count-1] = R;
1512 else if (types[count-2] == L)
1513 types[count-1] = L;
1514 }
1515
1516 /* Rule (N2)
1517 * N2. Any remaining neutrals take the embedding direction.
1518 */
1519 for (i=0; i<count; i++) {
1520 if (types[i] == ON) {
1521 if ((levels[i] % 2) == 0)
1522 types[i] = L;
1523 else
1524 types[i] = R;
1525 }
1526 }
1527
1528 /* Rule (I1)
1529 * I1. For all characters with an even (left-to-right) embedding
1530 * direction, those of type R go up one level and those of type AN or
1531 * EN go up two levels.
1532 */
1533 for (i=0; i<count; i++) {
1534 if ((levels[i] % 2) == 0) {
1535 if (types[i] == R)
1536 levels[i] += 1;
1537 else if (types[i] == AN || types[i] == EN)
1538 levels[i] += 2;
1539 }
1540 }
1541
1542 /* Rule (I2)
1543 * I2. For all characters with an odd (right-to-left) embedding direction,
1544 * those of type L, EN or AN go up one level.
1545 */
1546 for (i=0; i<count; i++) {
1547 if ((levels[i] % 2) == 1) {
1548 if (types[i] == L || types[i] == EN || types[i] == AN)
1549 levels[i] += 1;
1550 }
1551 }
1552
1553 /* Rule (L1)
1554 * L1. On each line, reset the embedding level of the following characters
1555 * to the paragraph embedding level:
1556 * (1)segment separators, (2)paragraph separators,
1557 * (3)any sequence of whitespace characters preceding
1558 * a segment separator or paragraph separator,
1559 * (4)and any sequence of white space characters
1560 * at the end of the line.
1561 * The types of characters used here are the original types, not those
1562 * modified by the previous phase.
1563 */
1564 j=count-1;
1565 while (j>0 && (getType(line[j].wc) == WS)) {
1566 j--;
1567 }
1568 if (j < (count-1)) {
1569 for (j++; j<count; j++)
1570 levels[j] = paragraphLevel;
1571 }
1572 for (i=0; i<count; i++) {
1573 tempType = getType(line[i].wc);
1574 if (tempType == WS) {
1575 j=i;
1576 while (j<count && (getType(line[j].wc) == WS)) {
1577 j++;
1578 }
1579 if (j==count || getType(line[j].wc) == B ||
1580 getType(line[j].wc) == S) {
1581 for (j--; j>=i ; j--) {
1582 levels[j] = paragraphLevel;
1583 }
1584 }
1585 } else if (tempType == B || tempType == S) {
1586 levels[i] = paragraphLevel;
1587 }
1588 }
1589
1590 /* Rule (L4) NOT IMPLEMENTED
1591 * L4. A character that possesses the mirrored property as specified by
1592 * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
1593 * resolved directionality of that character is R.
1594 */
1595 /* Note: this is implemented before L2 for efficiency */
1596 for (i=0; i<count; i++)
1597 if ((levels[i] % 2) == 1)
1598 doMirror(&line[i].wc);
1599
1600 /* Rule (L2)
1601 * L2. From the highest level found in the text to the lowest odd level on
1602 * each line, including intermediate levels not actually present in the
1603 * text, reverse any contiguous sequence of characters that are at that
1604 * level or higher
1605 */
1606 /* we flip the character string and leave the level array */
1607 i=0;
1608 tempType = levels[0];
1609 while (i < count) {
1610 if (levels[i] > tempType)
1611 tempType = levels[i];
1612 i++;
1613 }
1614 /* maximum level in tempType. */
1615 while (tempType > 0) { /* loop from highest level to the least odd, */
1616 /* which i assume is 1 */
1617 flipThisRun(line, levels, tempType, count);
1618 tempType--;
1619 }
1620
1621 /* Rule (L3) NOT IMPLEMENTED
1622 * L3. Combining marks applied to a right-to-left base character will at
1623 * this point precede their base character. If the rendering engine
1624 * expects them to follow the base characters in the final display
1625 * process, then the ordering of the marks and the base character must
1626 * be reversed.
1627 */
1628 sfree(types);
1629 sfree(levels);
1630 return R;
1631 }
1632
1633
1634 /*
1635 * Bad, Horrible function
1636 * takes a pointer to a character that is checked for
1637 * having a mirror glyph.
1638 */
1639 void doMirror(unsigned int *ch)
1640 {
1641 if ((*ch & 0xFF00) == 0) {
1642 switch (*ch) {
1643 case 0x0028: *ch = 0x0029; break;
1644 case 0x0029: *ch = 0x0028; break;
1645 case 0x003C: *ch = 0x003E; break;
1646 case 0x003E: *ch = 0x003C; break;
1647 case 0x005B: *ch = 0x005D; break;
1648 case 0x005D: *ch = 0x005B; break;
1649 case 0x007B: *ch = 0x007D; break;
1650 case 0x007D: *ch = 0x007B; break;
1651 case 0x00AB: *ch = 0x00BB; break;
1652 case 0x00BB: *ch = 0x00AB; break;
1653 }
1654 } else if ((*ch & 0xFF00) == 0x2000) {
1655 switch (*ch) {
1656 case 0x2039: *ch = 0x203A; break;
1657 case 0x203A: *ch = 0x2039; break;
1658 case 0x2045: *ch = 0x2046; break;
1659 case 0x2046: *ch = 0x2045; break;
1660 case 0x207D: *ch = 0x207E; break;
1661 case 0x207E: *ch = 0x207D; break;
1662 case 0x208D: *ch = 0x208E; break;
1663 case 0x208E: *ch = 0x208D; break;
1664 }
1665 } else if ((*ch & 0xFF00) == 0x2200) {
1666 switch (*ch) {
1667 case 0x2208: *ch = 0x220B; break;
1668 case 0x2209: *ch = 0x220C; break;
1669 case 0x220A: *ch = 0x220D; break;
1670 case 0x220B: *ch = 0x2208; break;
1671 case 0x220C: *ch = 0x2209; break;
1672 case 0x220D: *ch = 0x220A; break;
1673 case 0x2215: *ch = 0x29F5; break;
1674 case 0x223C: *ch = 0x223D; break;
1675 case 0x223D: *ch = 0x223C; break;
1676 case 0x2243: *ch = 0x22CD; break;
1677 case 0x2252: *ch = 0x2253; break;
1678 case 0x2253: *ch = 0x2252; break;
1679 case 0x2254: *ch = 0x2255; break;
1680 case 0x2255: *ch = 0x2254; break;
1681 case 0x2264: *ch = 0x2265; break;
1682 case 0x2265: *ch = 0x2264; break;
1683 case 0x2266: *ch = 0x2267; break;
1684 case 0x2267: *ch = 0x2266; break;
1685 case 0x2268: *ch = 0x2269; break;
1686 case 0x2269: *ch = 0x2268; break;
1687 case 0x226A: *ch = 0x226B; break;
1688 case 0x226B: *ch = 0x226A; break;
1689 case 0x226E: *ch = 0x226F; break;
1690 case 0x226F: *ch = 0x226E; break;
1691 case 0x2270: *ch = 0x2271; break;
1692 case 0x2271: *ch = 0x2270; break;
1693 case 0x2272: *ch = 0x2273; break;
1694 case 0x2273: *ch = 0x2272; break;
1695 case 0x2274: *ch = 0x2275; break;
1696 case 0x2275: *ch = 0x2274; break;
1697 case 0x2276: *ch = 0x2277; break;
1698 case 0x2277: *ch = 0x2276; break;
1699 case 0x2278: *ch = 0x2279; break;
1700 case 0x2279: *ch = 0x2278; break;
1701 case 0x227A: *ch = 0x227B; break;
1702 case 0x227B: *ch = 0x227A; break;
1703 case 0x227C: *ch = 0x227D; break;
1704 case 0x227D: *ch = 0x227C; break;
1705 case 0x227E: *ch = 0x227F; break;
1706 case 0x227F: *ch = 0x227E; break;
1707 case 0x2280: *ch = 0x2281; break;
1708 case 0x2281: *ch = 0x2280; break;
1709 case 0x2282: *ch = 0x2283; break;
1710 case 0x2283: *ch = 0x2282; break;
1711 case 0x2284: *ch = 0x2285; break;
1712 case 0x2285: *ch = 0x2284; break;
1713 case 0x2286: *ch = 0x2287; break;
1714 case 0x2287: *ch = 0x2286; break;
1715 case 0x2288: *ch = 0x2289; break;
1716 case 0x2289: *ch = 0x2288; break;
1717 case 0x228A: *ch = 0x228B; break;
1718 case 0x228B: *ch = 0x228A; break;
1719 case 0x228F: *ch = 0x2290; break;
1720 case 0x2290: *ch = 0x228F; break;
1721 case 0x2291: *ch = 0x2292; break;
1722 case 0x2292: *ch = 0x2291; break;
1723 case 0x2298: *ch = 0x29B8; break;
1724 case 0x22A2: *ch = 0x22A3; break;
1725 case 0x22A3: *ch = 0x22A2; break;
1726 case 0x22A6: *ch = 0x2ADE; break;
1727 case 0x22A8: *ch = 0x2AE4; break;
1728 case 0x22A9: *ch = 0x2AE3; break;
1729 case 0x22AB: *ch = 0x2AE5; break;
1730 case 0x22B0: *ch = 0x22B1; break;
1731 case 0x22B1: *ch = 0x22B0; break;
1732 case 0x22B2: *ch = 0x22B3; break;
1733 case 0x22B3: *ch = 0x22B2; break;
1734 case 0x22B4: *ch = 0x22B5; break;
1735 case 0x22B5: *ch = 0x22B4; break;
1736 case 0x22B6: *ch = 0x22B7; break;
1737 case 0x22B7: *ch = 0x22B6; break;
1738 case 0x22C9: *ch = 0x22CA; break;
1739 case 0x22CA: *ch = 0x22C9; break;
1740 case 0x22CB: *ch = 0x22CC; break;
1741 case 0x22CC: *ch = 0x22CB; break;
1742 case 0x22CD: *ch = 0x2243; break;
1743 case 0x22D0: *ch = 0x22D1; break;
1744 case 0x22D1: *ch = 0x22D0; break;
1745 case 0x22D6: *ch = 0x22D7; break;
1746 case 0x22D7: *ch = 0x22D6; break;
1747 case 0x22D8: *ch = 0x22D9; break;
1748 case 0x22D9: *ch = 0x22D8; break;
1749 case 0x22DA: *ch = 0x22DB; break;
1750 case 0x22DB: *ch = 0x22DA; break;
1751 case 0x22DC: *ch = 0x22DD; break;
1752 case 0x22DD: *ch = 0x22DC; break;
1753 case 0x22DE: *ch = 0x22DF; break;
1754 case 0x22DF: *ch = 0x22DE; break;
1755 case 0x22E0: *ch = 0x22E1; break;
1756 case 0x22E1: *ch = 0x22E0; break;
1757 case 0x22E2: *ch = 0x22E3; break;
1758 case 0x22E3: *ch = 0x22E2; break;
1759 case 0x22E4: *ch = 0x22E5; break;
1760 case 0x22E5: *ch = 0x22E4; break;
1761 case 0x22E6: *ch = 0x22E7; break;
1762 case 0x22E7: *ch = 0x22E6; break;
1763 case 0x22E8: *ch = 0x22E9; break;
1764 case 0x22E9: *ch = 0x22E8; break;
1765 case 0x22EA: *ch = 0x22EB; break;
1766 case 0x22EB: *ch = 0x22EA; break;
1767 case 0x22EC: *ch = 0x22ED; break;
1768 case 0x22ED: *ch = 0x22EC; break;
1769 case 0x22F0: *ch = 0x22F1; break;
1770 case 0x22F1: *ch = 0x22F0; break;
1771 case 0x22F2: *ch = 0x22FA; break;
1772 case 0x22F3: *ch = 0x22FB; break;
1773 case 0x22F4: *ch = 0x22FC; break;
1774 case 0x22F6: *ch = 0x22FD; break;
1775 case 0x22F7: *ch = 0x22FE; break;
1776 case 0x22FA: *ch = 0x22F2; break;
1777 case 0x22FB: *ch = 0x22F3; break;
1778 case 0x22FC: *ch = 0x22F4; break;
1779 case 0x22FD: *ch = 0x22F6; break;
1780 case 0x22FE: *ch = 0x22F7; break;
1781 }
1782 } else if ((*ch & 0xFF00) == 0x2300) {
1783 switch (*ch) {
1784 case 0x2308: *ch = 0x2309; break;
1785 case 0x2309: *ch = 0x2308; break;
1786 case 0x230A: *ch = 0x230B; break;
1787 case 0x230B: *ch = 0x230A; break;
1788 case 0x2329: *ch = 0x232A; break;
1789 case 0x232A: *ch = 0x2329; break;
1790 }
1791 } else if ((*ch & 0xFF00) == 0x2700) {
1792 switch (*ch) {
1793 case 0x2768: *ch = 0x2769; break;
1794 case 0x2769: *ch = 0x2768; break;
1795 case 0x276A: *ch = 0x276B; break;
1796 case 0x276B: *ch = 0x276A; break;
1797 case 0x276C: *ch = 0x276D; break;
1798 case 0x276D: *ch = 0x276C; break;
1799 case 0x276E: *ch = 0x276F; break;
1800 case 0x276F: *ch = 0x276E; break;
1801 case 0x2770: *ch = 0x2771; break;
1802 case 0x2771: *ch = 0x2770; break;
1803 case 0x2772: *ch = 0x2773; break;
1804 case 0x2773: *ch = 0x2772; break;
1805 case 0x2774: *ch = 0x2775; break;
1806 case 0x2775: *ch = 0x2774; break;
1807 case 0x27D5: *ch = 0x27D6; break;
1808 case 0x27D6: *ch = 0x27D5; break;
1809 case 0x27DD: *ch = 0x27DE; break;
1810 case 0x27DE: *ch = 0x27DD; break;
1811 case 0x27E2: *ch = 0x27E3; break;
1812 case 0x27E3: *ch = 0x27E2; break;
1813 case 0x27E4: *ch = 0x27E5; break;
1814 case 0x27E5: *ch = 0x27E4; break;
1815 case 0x27E6: *ch = 0x27E7; break;
1816 case 0x27E7: *ch = 0x27E6; break;
1817 case 0x27E8: *ch = 0x27E9; break;
1818 case 0x27E9: *ch = 0x27E8; break;
1819 case 0x27EA: *ch = 0x27EB; break;
1820 case 0x27EB: *ch = 0x27EA; break;
1821 }
1822 } else if ((*ch & 0xFF00) == 0x2900) {
1823 switch (*ch) {
1824 case 0x2983: *ch = 0x2984; break;
1825 case 0x2984: *ch = 0x2983; break;
1826 case 0x2985: *ch = 0x2986; break;
1827 case 0x2986: *ch = 0x2985; break;
1828 case 0x2987: *ch = 0x2988; break;
1829 case 0x2988: *ch = 0x2987; break;
1830 case 0x2989: *ch = 0x298A; break;
1831 case 0x298A: *ch = 0x2989; break;
1832 case 0x298B: *ch = 0x298C; break;
1833 case 0x298C: *ch = 0x298B; break;
1834 case 0x298D: *ch = 0x2990; break;
1835 case 0x298E: *ch = 0x298F; break;
1836 case 0x298F: *ch = 0x298E; break;
1837 case 0x2990: *ch = 0x298D; break;
1838 case 0x2991: *ch = 0x2992; break;
1839 case 0x2992: *ch = 0x2991; break;
1840 case 0x2993: *ch = 0x2994; break;
1841 case 0x2994: *ch = 0x2993; break;
1842 case 0x2995: *ch = 0x2996; break;
1843 case 0x2996: *ch = 0x2995; break;
1844 case 0x2997: *ch = 0x2998; break;
1845 case 0x2998: *ch = 0x2997; break;
1846 case 0x29B8: *ch = 0x2298; break;
1847 case 0x29C0: *ch = 0x29C1; break;
1848 case 0x29C1: *ch = 0x29C0; break;
1849 case 0x29C4: *ch = 0x29C5; break;
1850 case 0x29C5: *ch = 0x29C4; break;
1851 case 0x29CF: *ch = 0x29D0; break;
1852 case 0x29D0: *ch = 0x29CF; break;
1853 case 0x29D1: *ch = 0x29D2; break;
1854 case 0x29D2: *ch = 0x29D1; break;
1855 case 0x29D4: *ch = 0x29D5; break;
1856 case 0x29D5: *ch = 0x29D4; break;
1857 case 0x29D8: *ch = 0x29D9; break;
1858 case 0x29D9: *ch = 0x29D8; break;
1859 case 0x29DA: *ch = 0x29DB; break;
1860 case 0x29DB: *ch = 0x29DA; break;
1861 case 0x29F5: *ch = 0x2215; break;
1862 case 0x29F8: *ch = 0x29F9; break;
1863 case 0x29F9: *ch = 0x29F8; break;
1864 case 0x29FC: *ch = 0x29FD; break;
1865 case 0x29FD: *ch = 0x29FC; break;
1866 }
1867 } else if ((*ch & 0xFF00) == 0x2A00) {
1868 switch (*ch) {
1869 case 0x2A2B: *ch = 0x2A2C; break;
1870 case 0x2A2C: *ch = 0x2A2B; break;
1871 case 0x2A2D: *ch = 0x2A2C; break;
1872 case 0x2A2E: *ch = 0x2A2D; break;
1873 case 0x2A34: *ch = 0x2A35; break;
1874 case 0x2A35: *ch = 0x2A34; break;
1875 case 0x2A3C: *ch = 0x2A3D; break;
1876 case 0x2A3D: *ch = 0x2A3C; break;
1877 case 0x2A64: *ch = 0x2A65; break;
1878 case 0x2A65: *ch = 0x2A64; break;
1879 case 0x2A79: *ch = 0x2A7A; break;
1880 case 0x2A7A: *ch = 0x2A79; break;
1881 case 0x2A7D: *ch = 0x2A7E; break;
1882 case 0x2A7E: *ch = 0x2A7D; break;
1883 case 0x2A7F: *ch = 0x2A80; break;
1884 case 0x2A80: *ch = 0x2A7F; break;
1885 case 0x2A81: *ch = 0x2A82; break;
1886 case 0x2A82: *ch = 0x2A81; break;
1887 case 0x2A83: *ch = 0x2A84; break;
1888 case 0x2A84: *ch = 0x2A83; break;
1889 case 0x2A8B: *ch = 0x2A8C; break;
1890 case 0x2A8C: *ch = 0x2A8B; break;
1891 case 0x2A91: *ch = 0x2A92; break;
1892 case 0x2A92: *ch = 0x2A91; break;
1893 case 0x2A93: *ch = 0x2A94; break;
1894 case 0x2A94: *ch = 0x2A93; break;
1895 case 0x2A95: *ch = 0x2A96; break;
1896 case 0x2A96: *ch = 0x2A95; break;
1897 case 0x2A97: *ch = 0x2A98; break;
1898 case 0x2A98: *ch = 0x2A97; break;
1899 case 0x2A99: *ch = 0x2A9A; break;
1900 case 0x2A9A: *ch = 0x2A99; break;
1901 case 0x2A9B: *ch = 0x2A9C; break;
1902 case 0x2A9C: *ch = 0x2A9B; break;
1903 case 0x2AA1: *ch = 0x2AA2; break;
1904 case 0x2AA2: *ch = 0x2AA1; break;
1905 case 0x2AA6: *ch = 0x2AA7; break;
1906 case 0x2AA7: *ch = 0x2AA6; break;
1907 case 0x2AA8: *ch = 0x2AA9; break;
1908 case 0x2AA9: *ch = 0x2AA8; break;
1909 case 0x2AAA: *ch = 0x2AAB; break;
1910 case 0x2AAB: *ch = 0x2AAA; break;
1911 case 0x2AAC: *ch = 0x2AAD; break;
1912 case 0x2AAD: *ch = 0x2AAC; break;
1913 case 0x2AAF: *ch = 0x2AB0; break;
1914 case 0x2AB0: *ch = 0x2AAF; break;
1915 case 0x2AB3: *ch = 0x2AB4; break;
1916 case 0x2AB4: *ch = 0x2AB3; break;
1917 case 0x2ABB: *ch = 0x2ABC; break;
1918 case 0x2ABC: *ch = 0x2ABB; break;
1919 case 0x2ABD: *ch = 0x2ABE; break;
1920 case 0x2ABE: *ch = 0x2ABD; break;
1921 case 0x2ABF: *ch = 0x2AC0; break;
1922 case 0x2AC0: *ch = 0x2ABF; break;
1923 case 0x2AC1: *ch = 0x2AC2; break;
1924 case 0x2AC2: *ch = 0x2AC1; break;
1925 case 0x2AC3: *ch = 0x2AC4; break;
1926 case 0x2AC4: *ch = 0x2AC3; break;
1927 case 0x2AC5: *ch = 0x2AC6; break;
1928 case 0x2AC6: *ch = 0x2AC5; break;
1929 case 0x2ACD: *ch = 0x2ACE; break;
1930 case 0x2ACE: *ch = 0x2ACD; break;
1931 case 0x2ACF: *ch = 0x2AD0; break;
1932 case 0x2AD0: *ch = 0x2ACF; break;
1933 case 0x2AD1: *ch = 0x2AD2; break;
1934 case 0x2AD2: *ch = 0x2AD1; break;
1935 case 0x2AD3: *ch = 0x2AD4; break;
1936 case 0x2AD4: *ch = 0x2AD3; break;
1937 case 0x2AD5: *ch = 0x2AD6; break;
1938 case 0x2AD6: *ch = 0x2AD5; break;
1939 case 0x2ADE: *ch = 0x22A6; break;
1940 case 0x2AE3: *ch = 0x22A9; break;
1941 case 0x2AE4: *ch = 0x22A8; break;
1942 case 0x2AE5: *ch = 0x22AB; break;
1943 case 0x2AEC: *ch = 0x2AED; break;
1944 case 0x2AED: *ch = 0x2AEC; break;
1945 case 0x2AF7: *ch = 0x2AF8; break;
1946 case 0x2AF8: *ch = 0x2AF7; break;
1947 case 0x2AF9: *ch = 0x2AFA; break;
1948 case 0x2AFA: *ch = 0x2AF9; break;
1949 }
1950 } else if ((*ch & 0xFF00) == 0x3000) {
1951 switch (*ch) {
1952 case 0x3008: *ch = 0x3009; break;
1953 case 0x3009: *ch = 0x3008; break;
1954 case 0x300A: *ch = 0x300B; break;
1955 case 0x300B: *ch = 0x300A; break;
1956 case 0x300C: *ch = 0x300D; break;
1957 case 0x300D: *ch = 0x300C; break;
1958 case 0x300E: *ch = 0x300F; break;
1959 case 0x300F: *ch = 0x300E; break;
1960 case 0x3010: *ch = 0x3011; break;
1961 case 0x3011: *ch = 0x3010; break;
1962 case 0x3014: *ch = 0x3015; break;
1963 case 0x3015: *ch = 0x3014; break;
1964 case 0x3016: *ch = 0x3017; break;
1965 case 0x3017: *ch = 0x3016; break;
1966 case 0x3018: *ch = 0x3019; break;
1967 case 0x3019: *ch = 0x3018; break;
1968 case 0x301A: *ch = 0x301B; break;
1969 case 0x301B: *ch = 0x301A; break;
1970 }
1971 } else if ((*ch & 0xFF00) == 0xFF00) {
1972 switch (*ch) {
1973 case 0xFF08: *ch = 0xFF09; break;
1974 case 0xFF09: *ch = 0xFF08; break;
1975 case 0xFF1C: *ch = 0xFF1E; break;
1976 case 0xFF1E: *ch = 0xFF1C; break;
1977 case 0xFF3B: *ch = 0xFF3D; break;
1978 case 0xFF3D: *ch = 0xFF3B; break;
1979 case 0xFF5B: *ch = 0xFF5D; break;
1980 case 0xFF5D: *ch = 0xFF5B; break;
1981 case 0xFF5F: *ch = 0xFF60; break;
1982 case 0xFF60: *ch = 0xFF5F; break;
1983 case 0xFF62: *ch = 0xFF63; break;
1984 case 0xFF63: *ch = 0xFF62; break;
1985 }
1986 }
1987 }
1988
1989 #ifdef TEST_GETTYPE
1990
1991 #include <stdio.h>
1992 #include <assert.h>
1993
1994 int main(int argc, char **argv)
1995 {
1996 static const struct { int type; char *name; } typetoname[] = {
1997 #define TYPETONAME(X) { X , #X }
1998 TYPETONAME(L),
1999 TYPETONAME(LRE),
2000 TYPETONAME(LRO),
2001 TYPETONAME(R),
2002 TYPETONAME(AL),
2003 TYPETONAME(RLE),
2004 TYPETONAME(RLO),
2005 TYPETONAME(PDF),
2006 TYPETONAME(EN),
2007 TYPETONAME(ES),
2008 TYPETONAME(ET),
2009 TYPETONAME(AN),
2010 TYPETONAME(CS),
2011 TYPETONAME(NSM),
2012 TYPETONAME(BN),
2013 TYPETONAME(B),
2014 TYPETONAME(S),
2015 TYPETONAME(WS),
2016 TYPETONAME(ON),
2017 #undef TYPETONAME
2018 };
2019 int i;
2020
2021 for (i = 1; i < argc; i++) {
2022 unsigned long chr = strtoul(argv[i], NULL, 0);
2023 int type = getType(chr);
2024 assert(typetoname[type].type == type);
2025 printf("U+%04x: %s\n", chr, typetoname[type].name);
2026 }
2027
2028 return 0;
2029 }
2030
2031 #endif