1 /************************************************************************
7 * This is an implemention of Unicode's Bidirectional Algorithm
10 * http://www.unicode.org/reports/tr9/
12 * Author: Ahmad Khalifa
15 * Revision Details: (Updated by Revision Control System)
21 * (www.arabeyes.org - under MIT license)
23 ************************************************************************/
28 * - Explicit marks need to be handled (they are not 100% now)
32 #include <stdlib.h> /* definition of wchar_t*/
36 #define LMASK 0x3F /* Embedding Level mask */
37 #define OMASK 0xC0 /* Override mask */
38 #define OISL 0x80 /* Override is L */
39 #define OISR 0x40 /* Override is R */
41 /* For standalone compilation in a testing mode.
42 * Still depends on the PuTTY headers for snewn and sfree, but can avoid
43 * _linking_ with any other PuTTY code. */
45 #define safemalloc malloc
50 #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
51 shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
52 #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
53 #define SFINAL(xh) ((xh)+1)
54 #define SINITIAL(xh) ((xh)+2)
55 #define SMEDIAL(ch) ((ch)+3)
57 #define leastGreaterOdd(x) ( ((x)+1) | 1 )
58 #define leastGreaterEven(x) ( ((x)+2) &~ 1 )
60 typedef struct bidi_char
{
61 unsigned int origwc
, wc
;
65 /* function declarations */
66 void flipThisRun(bidi_char
*from
, unsigned char* level
, int max
, int count
);
67 int findIndexOfRun(unsigned char* level
, int start
, int count
, int tlevel
);
68 unsigned char getType(int ch
);
69 unsigned char setOverrideBits(unsigned char level
, unsigned char override
);
70 int getPreviousLevel(unsigned char* level
, int from
);
71 int do_shape(bidi_char
*line
, bidi_char
*to
, int count
);
72 int do_bidi(bidi_char
*line
, int count
);
73 void doMirror(unsigned int *ch
);
100 SL
, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
101 SR
, /* Right-Joining, ie has Isolated, Final */
102 SD
, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
103 SU
, /* Non-Joining */
104 SC
/* Join-Causing, like U+0640 (TATWEEL) */
112 /* Kept near the actual table, for verification. */
113 #define SHAPE_FIRST 0x621
114 #define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1)
116 const shape_node shapetypes
[] = {
117 /* index, Typ, Iso, Ligature Index*/
118 /* 621 */ {SU
, 0xFE80},
119 /* 622 */ {SR
, 0xFE81},
120 /* 623 */ {SR
, 0xFE83},
121 /* 624 */ {SR
, 0xFE85},
122 /* 625 */ {SR
, 0xFE87},
123 /* 626 */ {SD
, 0xFE89},
124 /* 627 */ {SR
, 0xFE8D},
125 /* 628 */ {SD
, 0xFE8F},
126 /* 629 */ {SR
, 0xFE93},
127 /* 62A */ {SD
, 0xFE95},
128 /* 62B */ {SD
, 0xFE99},
129 /* 62C */ {SD
, 0xFE9D},
130 /* 62D */ {SD
, 0xFEA1},
131 /* 62E */ {SD
, 0xFEA5},
132 /* 62F */ {SR
, 0xFEA9},
133 /* 630 */ {SR
, 0xFEAB},
134 /* 631 */ {SR
, 0xFEAD},
135 /* 632 */ {SR
, 0xFEAF},
136 /* 633 */ {SD
, 0xFEB1},
137 /* 634 */ {SD
, 0xFEB5},
138 /* 635 */ {SD
, 0xFEB9},
139 /* 636 */ {SD
, 0xFEBD},
140 /* 637 */ {SD
, 0xFEC1},
141 /* 638 */ {SD
, 0xFEC5},
142 /* 639 */ {SD
, 0xFEC9},
143 /* 63A */ {SD
, 0xFECD},
150 /* 641 */ {SD
, 0xFED1},
151 /* 642 */ {SD
, 0xFED5},
152 /* 643 */ {SD
, 0xFED9},
153 /* 644 */ {SD
, 0xFEDD},
154 /* 645 */ {SD
, 0xFEE1},
155 /* 646 */ {SD
, 0xFEE5},
156 /* 647 */ {SD
, 0xFEE9},
157 /* 648 */ {SR
, 0xFEED},
158 /* 649 */ {SR
, 0xFEEF}, /* SD */
159 /* 64A */ {SD
, 0xFEF1},
198 /* 671 */ {SR
, 0xFB50},
206 /* 679 */ {SD
, 0xFB66},
207 /* 67A */ {SD
, 0xFB5E},
208 /* 67B */ {SD
, 0xFB52},
211 /* 67E */ {SD
, 0xFB56},
212 /* 67F */ {SD
, 0xFB62},
213 /* 680 */ {SD
, 0xFB5A},
216 /* 683 */ {SD
, 0xFB76},
217 /* 684 */ {SD
, 0xFB72},
219 /* 686 */ {SD
, 0xFB7A},
220 /* 687 */ {SD
, 0xFB7E},
221 /* 688 */ {SR
, 0xFB88},
225 /* 68C */ {SR
, 0xFB84},
226 /* 68D */ {SR
, 0xFB82},
227 /* 68E */ {SR
, 0xFB86},
230 /* 691 */ {SR
, 0xFB8C},
237 /* 698 */ {SR
, 0xFB8A},
249 /* 6A4 */ {SD
, 0xFB6A},
251 /* 6A6 */ {SD
, 0xFB6E},
254 /* 6A9 */ {SD
, 0xFB8E},
258 /* 6AD */ {SD
, 0xFBD3},
260 /* 6AF */ {SD
, 0xFB92},
262 /* 6B1 */ {SD
, 0xFB9A},
264 /* 6B3 */ {SD
, 0xFB96},
271 /* 6BA */ {SR
, 0xFB9E},
272 /* 6BB */ {SD
, 0xFBA0},
275 /* 6BE */ {SD
, 0xFBAA},
277 /* 6C0 */ {SR
, 0xFBA4},
278 /* 6C1 */ {SD
, 0xFBA6},
282 /* 6C5 */ {SR
, 0xFBE0},
283 /* 6C6 */ {SR
, 0xFBD9},
284 /* 6C7 */ {SR
, 0xFBD7},
285 /* 6C8 */ {SR
, 0xFBDB},
286 /* 6C9 */ {SR
, 0xFBE2},
288 /* 6CB */ {SR
, 0xFBDE},
289 /* 6CC */ {SD
, 0xFBFC},
295 /* 6D2 */ {SR
, 0xFBAE},
299 * Flips the text buffer, according to max level, and
303 * from: text buffer, on which to apply flipping
304 * level: resolved levels buffer
305 * max: the maximum level found in this line (should be unsigned char)
306 * count: line size in bidi_char
308 void flipThisRun(bidi_char
*from
, unsigned char *level
, int max
, int count
)
314 while (i
<count
&& j
<count
) {
316 /* find the start of the run of level=max */
318 i
= j
= findIndexOfRun(level
, i
, count
, max
);
319 /* find the end of the run */
320 while (i
<count
&& tlevel
<= level
[i
]) {
323 for (k
= i
- 1; k
> j
; k
--, j
++) {
332 * Finds the index of a run with level equals tlevel
334 int findIndexOfRun(unsigned char* level
, int start
, int count
, int tlevel
)
337 for (i
=start
; i
<count
; i
++) {
338 if (tlevel
== level
[i
]) {
346 * Returns the bidi character type of ch.
348 * The data table in this function is constructed from the Unicode
349 * Character Database, downloadable from unicode.org at the URL
351 * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
353 * by the following fragment of Perl:
355 perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
356 -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
357 -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
358 -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
359 -e '$pfl=$fl; END { &reset }; sub reset {' \
360 -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
361 -e ' if defined $runstart and $runtype ne "ON";' \
362 -e '$runstart=$runend=$num; $runtype=$type;}' \
366 unsigned char getType(int ch
)
368 static const struct {
369 int first
, last
, type
;
371 {0x0000, 0x0008, BN
},
375 {0x000c, 0x000c, WS
},
377 {0x000e, 0x001b, BN
},
380 {0x0020, 0x0020, WS
},
381 {0x0023, 0x0025, ET
},
382 {0x002b, 0x002b, ES
},
383 {0x002c, 0x002c, CS
},
384 {0x002d, 0x002d, ES
},
385 {0x002e, 0x002f, CS
},
386 {0x0030, 0x0039, EN
},
387 {0x003a, 0x003a, CS
},
390 {0x007f, 0x0084, BN
},
392 {0x0086, 0x009f, BN
},
393 {0x00a0, 0x00a0, CS
},
394 {0x00a2, 0x00a5, ET
},
396 {0x00ad, 0x00ad, BN
},
397 {0x00b0, 0x00b1, ET
},
398 {0x00b2, 0x00b3, EN
},
400 {0x00b9, 0x00b9, EN
},
410 {0x0300, 0x0357, NSM
},
411 {0x035d, 0x036f, NSM
},
421 {0x0483, 0x0486, NSM
},
422 {0x0488, 0x0489, NSM
},
431 {0x0591, 0x05a1, NSM
},
432 {0x05a3, 0x05b9, NSM
},
433 {0x05bb, 0x05bd, NSM
},
435 {0x05bf, 0x05bf, NSM
},
437 {0x05c1, 0x05c2, NSM
},
439 {0x05c4, 0x05c4, NSM
},
442 {0x0600, 0x0603, AL
},
443 {0x060c, 0x060c, CS
},
444 {0x060d, 0x060d, AL
},
445 {0x0610, 0x0615, NSM
},
446 {0x061b, 0x061b, AL
},
447 {0x061f, 0x061f, AL
},
448 {0x0621, 0x063a, AL
},
449 {0x0640, 0x064a, AL
},
450 {0x064b, 0x0658, NSM
},
451 {0x0660, 0x0669, AN
},
452 {0x066a, 0x066a, ET
},
453 {0x066b, 0x066c, AN
},
454 {0x066d, 0x066f, AL
},
455 {0x0670, 0x0670, NSM
},
456 {0x0671, 0x06d5, AL
},
457 {0x06d6, 0x06dc, NSM
},
458 {0x06dd, 0x06dd, AL
},
459 {0x06de, 0x06e4, NSM
},
460 {0x06e5, 0x06e6, AL
},
461 {0x06e7, 0x06e8, NSM
},
462 {0x06ea, 0x06ed, NSM
},
463 {0x06ee, 0x06ef, AL
},
464 {0x06f0, 0x06f9, EN
},
465 {0x06fa, 0x070d, AL
},
466 {0x070f, 0x070f, BN
},
467 {0x0710, 0x0710, AL
},
468 {0x0711, 0x0711, NSM
},
469 {0x0712, 0x072f, AL
},
470 {0x0730, 0x074a, NSM
},
471 {0x074d, 0x074f, AL
},
472 {0x0780, 0x07a5, AL
},
473 {0x07a6, 0x07b0, NSM
},
474 {0x07b1, 0x07b1, AL
},
475 {0x0901, 0x0902, NSM
},
477 {0x093c, 0x093c, NSM
},
479 {0x0941, 0x0948, NSM
},
481 {0x094d, 0x094d, NSM
},
483 {0x0951, 0x0954, NSM
},
485 {0x0962, 0x0963, NSM
},
487 {0x0981, 0x0981, NSM
},
495 {0x09bc, 0x09bc, NSM
},
497 {0x09c1, 0x09c4, NSM
},
500 {0x09cd, 0x09cd, NSM
},
504 {0x09e2, 0x09e3, NSM
},
506 {0x09f2, 0x09f3, ET
},
508 {0x0a01, 0x0a02, NSM
},
517 {0x0a3c, 0x0a3c, NSM
},
519 {0x0a41, 0x0a42, NSM
},
520 {0x0a47, 0x0a48, NSM
},
521 {0x0a4b, 0x0a4d, NSM
},
525 {0x0a70, 0x0a71, NSM
},
527 {0x0a81, 0x0a82, NSM
},
535 {0x0abc, 0x0abc, NSM
},
537 {0x0ac1, 0x0ac5, NSM
},
538 {0x0ac7, 0x0ac8, NSM
},
541 {0x0acd, 0x0acd, NSM
},
544 {0x0ae2, 0x0ae3, NSM
},
546 {0x0af1, 0x0af1, ET
},
547 {0x0b01, 0x0b01, NSM
},
555 {0x0b3c, 0x0b3c, NSM
},
557 {0x0b3f, 0x0b3f, NSM
},
559 {0x0b41, 0x0b43, NSM
},
562 {0x0b4d, 0x0b4d, NSM
},
563 {0x0b56, 0x0b56, NSM
},
568 {0x0b82, 0x0b82, NSM
},
581 {0x0bc0, 0x0bc0, NSM
},
585 {0x0bcd, 0x0bcd, NSM
},
588 {0x0bf9, 0x0bf9, ET
},
595 {0x0c3e, 0x0c40, NSM
},
597 {0x0c46, 0x0c48, NSM
},
598 {0x0c4a, 0x0c4d, NSM
},
599 {0x0c55, 0x0c56, NSM
},
608 {0x0cbc, 0x0cbc, NSM
},
612 {0x0ccc, 0x0ccd, NSM
},
623 {0x0d41, 0x0d43, NSM
},
626 {0x0d4d, 0x0d4d, NSM
},
636 {0x0dca, 0x0dca, NSM
},
638 {0x0dd2, 0x0dd4, NSM
},
639 {0x0dd6, 0x0dd6, NSM
},
643 {0x0e31, 0x0e31, NSM
},
645 {0x0e34, 0x0e3a, NSM
},
646 {0x0e3f, 0x0e3f, ET
},
648 {0x0e47, 0x0e4e, NSM
},
662 {0x0eb1, 0x0eb1, NSM
},
664 {0x0eb4, 0x0eb9, NSM
},
665 {0x0ebb, 0x0ebc, NSM
},
669 {0x0ec8, 0x0ecd, NSM
},
673 {0x0f18, 0x0f19, NSM
},
675 {0x0f35, 0x0f35, NSM
},
677 {0x0f37, 0x0f37, NSM
},
679 {0x0f39, 0x0f39, NSM
},
682 {0x0f71, 0x0f7e, NSM
},
684 {0x0f80, 0x0f84, NSM
},
686 {0x0f86, 0x0f87, NSM
},
688 {0x0f90, 0x0f97, NSM
},
689 {0x0f99, 0x0fbc, NSM
},
691 {0x0fc6, 0x0fc6, NSM
},
698 {0x102d, 0x1030, NSM
},
700 {0x1032, 0x1032, NSM
},
701 {0x1036, 0x1037, NSM
},
703 {0x1039, 0x1039, NSM
},
705 {0x1058, 0x1059, NSM
},
740 {0x1680, 0x1680, WS
},
745 {0x1712, 0x1714, NSM
},
747 {0x1732, 0x1734, NSM
},
750 {0x1752, 0x1753, NSM
},
753 {0x1772, 0x1773, NSM
},
755 {0x17b7, 0x17bd, NSM
},
757 {0x17c6, 0x17c6, NSM
},
759 {0x17c9, 0x17d3, NSM
},
761 {0x17db, 0x17db, ET
},
763 {0x17dd, 0x17dd, NSM
},
765 {0x180b, 0x180d, NSM
},
766 {0x180e, 0x180e, WS
},
770 {0x18a9, 0x18a9, NSM
},
772 {0x1920, 0x1922, NSM
},
774 {0x1927, 0x192b, NSM
},
776 {0x1932, 0x1932, NSM
},
778 {0x1939, 0x193b, NSM
},
803 {0x2000, 0x200a, WS
},
804 {0x200b, 0x200d, BN
},
807 {0x2028, 0x2028, WS
},
809 {0x202a, 0x202a, LRE
},
810 {0x202b, 0x202b, RLE
},
811 {0x202c, 0x202c, PDF
},
812 {0x202d, 0x202d, LRO
},
813 {0x202e, 0x202e, RLO
},
814 {0x202f, 0x202f, WS
},
815 {0x2030, 0x2034, ET
},
816 {0x2044, 0x2044, CS
},
817 {0x205f, 0x205f, WS
},
818 {0x2060, 0x2063, BN
},
819 {0x206a, 0x206f, BN
},
820 {0x2070, 0x2070, EN
},
822 {0x2074, 0x2079, EN
},
823 {0x207a, 0x207b, ET
},
825 {0x2080, 0x2089, EN
},
826 {0x208a, 0x208b, ET
},
827 {0x20a0, 0x20b1, ET
},
828 {0x20d0, 0x20ea, NSM
},
838 {0x212e, 0x212e, ET
},
844 {0x2212, 0x2213, ET
},
847 {0x2488, 0x249b, EN
},
850 {0x3000, 0x3000, WS
},
853 {0x302a, 0x302f, NSM
},
857 {0x3099, 0x309a, NSM
},
882 {0xfb1e, 0xfb1e, NSM
},
884 {0xfb29, 0xfb29, ET
},
891 {0xfb50, 0xfbb1, AL
},
892 {0xfbd3, 0xfd3d, AL
},
893 {0xfd50, 0xfd8f, AL
},
894 {0xfd92, 0xfdc7, AL
},
895 {0xfdf0, 0xfdfc, AL
},
896 {0xfe00, 0xfe0f, NSM
},
897 {0xfe20, 0xfe23, NSM
},
898 {0xfe50, 0xfe50, CS
},
899 {0xfe52, 0xfe52, CS
},
900 {0xfe55, 0xfe55, CS
},
901 {0xfe5f, 0xfe5f, ET
},
902 {0xfe62, 0xfe63, ET
},
903 {0xfe69, 0xfe6a, ET
},
904 {0xfe70, 0xfe74, AL
},
905 {0xfe76, 0xfefc, AL
},
906 {0xfeff, 0xfeff, BN
},
907 {0xff03, 0xff05, ET
},
908 {0xff0b, 0xff0b, ET
},
909 {0xff0c, 0xff0c, CS
},
910 {0xff0d, 0xff0d, ET
},
911 {0xff0e, 0xff0e, CS
},
912 {0xff0f, 0xff0f, ES
},
913 {0xff10, 0xff19, EN
},
914 {0xff1a, 0xff1a, CS
},
922 {0xffe0, 0xffe1, ET
},
923 {0xffe5, 0xffe6, ET
},
924 {0x10000, 0x1000b, L
},
925 {0x1000d, 0x10026, L
},
926 {0x10028, 0x1003a, L
},
927 {0x1003c, 0x1003d, L
},
928 {0x1003f, 0x1004d, L
},
929 {0x10050, 0x1005d, L
},
930 {0x10080, 0x100fa, L
},
931 {0x10100, 0x10100, L
},
932 {0x10102, 0x10102, L
},
933 {0x10107, 0x10133, L
},
934 {0x10137, 0x1013f, L
},
935 {0x10300, 0x1031e, L
},
936 {0x10320, 0x10323, L
},
937 {0x10330, 0x1034a, L
},
938 {0x10380, 0x1039d, L
},
939 {0x1039f, 0x1039f, L
},
940 {0x10400, 0x1049d, L
},
941 {0x104a0, 0x104a9, L
},
942 {0x10800, 0x10805, R
},
943 {0x10808, 0x10808, R
},
944 {0x1080a, 0x10835, R
},
945 {0x10837, 0x10838, R
},
946 {0x1083c, 0x1083c, R
},
947 {0x1083f, 0x1083f, R
},
948 {0x1d000, 0x1d0f5, L
},
949 {0x1d100, 0x1d126, L
},
950 {0x1d12a, 0x1d166, L
},
951 {0x1d167, 0x1d169, NSM
},
952 {0x1d16a, 0x1d172, L
},
953 {0x1d173, 0x1d17a, BN
},
954 {0x1d17b, 0x1d182, NSM
},
955 {0x1d183, 0x1d184, L
},
956 {0x1d185, 0x1d18b, NSM
},
957 {0x1d18c, 0x1d1a9, L
},
958 {0x1d1aa, 0x1d1ad, NSM
},
959 {0x1d1ae, 0x1d1dd, L
},
960 {0x1d400, 0x1d454, L
},
961 {0x1d456, 0x1d49c, L
},
962 {0x1d49e, 0x1d49f, L
},
963 {0x1d4a2, 0x1d4a2, L
},
964 {0x1d4a5, 0x1d4a6, L
},
965 {0x1d4a9, 0x1d4ac, L
},
966 {0x1d4ae, 0x1d4b9, L
},
967 {0x1d4bb, 0x1d4bb, L
},
968 {0x1d4bd, 0x1d4c3, L
},
969 {0x1d4c5, 0x1d505, L
},
970 {0x1d507, 0x1d50a, L
},
971 {0x1d50d, 0x1d514, L
},
972 {0x1d516, 0x1d51c, L
},
973 {0x1d51e, 0x1d539, L
},
974 {0x1d53b, 0x1d53e, L
},
975 {0x1d540, 0x1d544, L
},
976 {0x1d546, 0x1d546, L
},
977 {0x1d54a, 0x1d550, L
},
978 {0x1d552, 0x1d6a3, L
},
979 {0x1d6a8, 0x1d7c9, L
},
980 {0x1d7ce, 0x1d7ff, EN
},
981 {0x20000, 0x2a6d6, L
},
982 {0x2f800, 0x2fa1d, L
},
983 {0xe0001, 0xe0001, BN
},
984 {0xe0020, 0xe007f, BN
},
985 {0xe0100, 0xe01ef, NSM
},
986 {0xf0000, 0xffffd, L
},
987 {0x100000, 0x10fffd, L
}
997 if (ch
< lookup
[k
].first
)
999 else if (ch
> lookup
[k
].last
)
1002 return lookup
[k
].type
;
1006 * If we reach here, the character was not in any of the
1007 * intervals listed in the lookup table. This means we return
1008 * ON (`Other Neutrals'). This is the appropriate code for any
1009 * character genuinely not listed in the Unicode table, and
1010 * also the table above has deliberately left out any
1011 * characters _explicitly_ listed as ON (to save space!).
1017 * Function exported to front ends to allow them to identify
1018 * bidi-active characters (in case, for example, the platform's
1019 * text display function can't conveniently be prevented from doing
1020 * its own bidi and so special treatment is required for characters
1021 * that would cause the bidi algorithm to activate).
1023 * This function is passed a single Unicode code point, and returns
1024 * nonzero if the presence of this code point can possibly cause
1025 * the bidi algorithm to do any reordering. Thus, any string
1026 * composed entirely of characters for which is_rtl() returns zero
1027 * should be safe to pass to a bidi-active platform display
1028 * function without fear.
1030 * (is_rtl() must therefore also return true for any character
1031 * which would be affected by Arabic shaping, but this isn't
1032 * important because all such characters are right-to-left so it
1033 * would have flagged them anyway.)
1038 * After careful reading of the Unicode bidi algorithm (URL as
1039 * given at the top of this file) I believe that the only
1040 * character classes which can possibly cause trouble are R,
1041 * AL, RLE and RLO. I think that any string containing no
1042 * character in any of those classes will be displayed
1043 * uniformly left-to-right by the Unicode bidi algorithm.
1045 const int mask
= (1<<R
) | (1<<AL
) | (1<<RLE
) | (1<<RLO
);
1047 return mask
& (1 << (getType(c
)));
1051 * The most significant 2 bits of each level are used to store
1052 * Override status of each character
1053 * This function sets the override bits of level according
1054 * to the value in override, and reurns the new byte.
1056 unsigned char setOverrideBits(unsigned char level
, unsigned char override
)
1060 else if (override
== R
)
1061 return level
| OISR
;
1062 else if (override
== L
)
1063 return level
| OISL
;
1068 * Find the most recent run of the same value in `level', and
1069 * return the value _before_ it. Used to process U+202C POP
1070 * DIRECTIONAL FORMATTING.
1072 int getPreviousLevel(unsigned char* level
, int from
)
1075 unsigned char current
= level
[--from
];
1077 while (from
>= 0 && level
[from
] == current
)
1088 /* The Main shaping function, and the only one to be used
1089 * by the outside world.
1091 * line: buffer to apply shaping to. this must be passed by doBidi() first
1092 * to: output buffer for the shaped data
1093 * count: number of characters in line
1095 int do_shape(bidi_char
*line
, bidi_char
*to
, int count
)
1097 int i
, tempShape
, ligFlag
;
1099 for (ligFlag
=i
=0; i
<count
; i
++) {
1101 tempShape
= STYPE(line
[i
].wc
);
1102 switch (tempShape
) {
1110 tempShape
= (i
+1 < count ?
STYPE(line
[i
+1].wc
) : SU
);
1111 if ((tempShape
== SL
) || (tempShape
== SD
) || (tempShape
== SC
))
1112 to
[i
].wc
= SFINAL((SISOLATED(line
[i
].wc
)));
1114 to
[i
].wc
= SISOLATED(line
[i
].wc
);
1119 /* Make Ligatures */
1120 tempShape
= (i
+1 < count ?
STYPE(line
[i
+1].wc
) : SU
);
1121 if (line
[i
].wc
== 0x644) {
1122 if (i
> 0) switch (line
[i
-1].wc
) {
1125 if ((tempShape
== SL
) || (tempShape
== SD
) || (tempShape
== SC
))
1132 if ((tempShape
== SL
) || (tempShape
== SD
) || (tempShape
== SC
))
1139 if ((tempShape
== SL
) || (tempShape
== SD
) || (tempShape
== SC
))
1146 if ((tempShape
== SL
) || (tempShape
== SD
) || (tempShape
== SC
))
1159 if ((tempShape
== SL
) || (tempShape
== SD
) || (tempShape
== SC
)) {
1160 tempShape
= (i
> 0 ?
STYPE(line
[i
-1].wc
) : SU
);
1161 if ((tempShape
== SR
) || (tempShape
== SD
) || (tempShape
== SC
))
1162 to
[i
].wc
= SMEDIAL((SISOLATED(line
[i
].wc
)));
1164 to
[i
].wc
= SFINAL((SISOLATED(line
[i
].wc
)));
1168 tempShape
= (i
> 0 ?
STYPE(line
[i
-1].wc
) : SU
);
1169 if ((tempShape
== SR
) || (tempShape
== SD
) || (tempShape
== SC
))
1170 to
[i
].wc
= SINITIAL((SISOLATED(line
[i
].wc
)));
1172 to
[i
].wc
= SISOLATED(line
[i
].wc
);
1182 * The Main Bidi Function, and the only function that should
1183 * be used by the outside world.
1185 * line: a buffer of size count containing text to apply
1186 * the Bidirectional algorithm to.
1189 int do_bidi(bidi_char
*line
, int count
)
1191 unsigned char* types
;
1192 unsigned char* levels
;
1193 unsigned char paragraphLevel
;
1194 unsigned char currentEmbedding
;
1195 unsigned char currentOverride
;
1196 unsigned char tempType
;
1197 int i
, j
, yes
, bover
;
1199 /* Check the presence of R or AL types as optimization */
1201 for (i
=0; i
<count
; i
++) {
1202 int type
= getType(line
[i
].wc
);
1203 if (type
== R
|| type
== AL
) {
1211 /* Initialize types, levels */
1212 types
= snewn(count
, unsigned char);
1213 levels
= snewn(count
, unsigned char);
1215 /* Rule (P1) NOT IMPLEMENTED
1216 * P1. Split the text into separate paragraphs. A paragraph separator is
1217 * kept with the previous paragraph. Within each paragraph, apply all the
1218 * other rules of this algorithm.
1222 * P2. In each paragraph, find the first character of type L, AL, or R.
1223 * P3. If a character is found in P2 and it is of type AL or R, then set
1224 * the paragraph embedding level to one; otherwise, set it to zero.
1227 for (i
=0; i
<count
; i
++) {
1228 int type
= getType(line
[i
].wc
);
1229 if (type
== R
|| type
== AL
) {
1232 } else if (type
== L
)
1237 * X1. Begin by setting the current embedding level to the paragraph
1238 * embedding level. Set the directional override status to neutral.
1240 currentEmbedding
= paragraphLevel
;
1241 currentOverride
= ON
;
1243 /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
1244 * X2. With each RLE, compute the least greater odd embedding level.
1245 * X3. With each LRE, compute the least greater even embedding level.
1246 * X4. With each RLO, compute the least greater odd embedding level.
1247 * X5. With each LRO, compute the least greater even embedding level.
1248 * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
1249 * a. Set the level of the current character to the current
1251 * b. Whenever the directional override status is not neutral,
1252 * reset the current character type to the directional
1254 * X7. With each PDF, determine the matching embedding or override code.
1255 * If there was a valid matching code, restore (pop) the last
1256 * remembered (pushed) embedding level and directional override.
1257 * X8. All explicit directional embeddings and overrides are completely
1258 * terminated at the end of each paragraph. Paragraph separators are not
1259 * included in the embedding. (Useless here) NOT IMPLEMENTED
1262 for (i
=0; i
<count
; i
++) {
1263 tempType
= getType(line
[i
].wc
);
1266 currentEmbedding
= levels
[i
] = leastGreaterOdd(currentEmbedding
);
1267 levels
[i
] = setOverrideBits(levels
[i
], currentOverride
);
1268 currentOverride
= ON
;
1272 currentEmbedding
= levels
[i
] = leastGreaterEven(currentEmbedding
);
1273 levels
[i
] = setOverrideBits(levels
[i
], currentOverride
);
1274 currentOverride
= ON
;
1278 currentEmbedding
= levels
[i
] = leastGreaterOdd(currentEmbedding
);
1279 tempType
= currentOverride
= R
;
1284 currentEmbedding
= levels
[i
] = leastGreaterEven(currentEmbedding
);
1285 tempType
= currentOverride
= L
;
1291 int prevlevel
= getPreviousLevel(levels
, i
);
1293 if (prevlevel
== -1) {
1294 currentEmbedding
= paragraphLevel
;
1295 currentOverride
= ON
;
1297 currentOverride
= currentEmbedding
& OMASK
;
1298 currentEmbedding
= currentEmbedding
& ~OMASK
;
1301 levels
[i
] = currentEmbedding
;
1304 /* Whitespace is treated as neutral for now */
1307 levels
[i
] = currentEmbedding
;
1309 if (currentOverride
!= ON
)
1310 tempType
= currentOverride
;
1314 levels
[i
] = currentEmbedding
;
1315 if (currentOverride
!= ON
)
1316 tempType
= currentOverride
;
1320 types
[i
] = tempType
;
1322 /* this clears out all overrides, so we can use levels safely... */
1323 /* checks bover first */
1325 for (i
=0; i
<count
; i
++)
1326 levels
[i
] = levels
[i
] & LMASK
;
1329 * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
1330 * Here, they're converted to BN.
1332 for (i
=0; i
<count
; i
++) {
1345 * W1. Examine each non-spacing mark (NSM) in the level run, and change
1346 * the type of the NSM to the type of the previous character. If the NSM
1347 * is at the start of the level run, it will get the type of sor.
1349 if (types
[0] == NSM
)
1350 types
[0] = paragraphLevel
;
1352 for (i
=1; i
<count
; i
++) {
1353 if (types
[i
] == NSM
)
1354 types
[i
] = types
[i
-1];
1355 /* Is this a safe assumption?
1356 * I assumed the previous, IS a character.
1361 * W2. Search backwards from each instance of a European number until the
1362 * first strong type (R, L, AL, or sor) is found. If an AL is found,
1363 * change the type of the European number to Arabic number.
1365 for (i
=0; i
<count
; i
++) {
1366 if (types
[i
] == EN
) {
1369 if (types
[j
] == AL
) {
1372 } else if (types
[j
] == R
|| types
[j
] == L
) {
1381 * W3. Change all ALs to R.
1383 * Optimization: on Rule Xn, we might set a flag on AL type
1384 * to prevent this loop in L R lines only...
1386 for (i
=0; i
<count
; i
++) {
1392 * W4. A single European separator between two European numbers changes
1393 * to a European number. A single common separator between two numbers
1394 * of the same type changes to that type.
1396 for (i
=1; i
<(count
-1); i
++) {
1397 if (types
[i
] == ES
) {
1398 if (types
[i
-1] == EN
&& types
[i
+1] == EN
)
1400 } else if (types
[i
] == CS
) {
1401 if (types
[i
-1] == EN
&& types
[i
+1] == EN
)
1403 else if (types
[i
-1] == AN
&& types
[i
+1] == AN
)
1409 * W5. A sequence of European terminators adjacent to European numbers
1410 * changes to all European numbers.
1412 * Optimization: lots here... else ifs need rearrangement
1414 for (i
=0; i
<count
; i
++) {
1415 if (types
[i
] == ET
) {
1416 if (i
> 0 && types
[i
-1] == EN
) {
1419 } else if (i
< count
-1 && types
[i
+1] == EN
) {
1422 } else if (i
< count
-1 && types
[i
+1] == ET
) {
1424 while (j
<count
&& types
[j
] == ET
) {
1434 * W6. Otherwise, separators and terminators change to Other Neutral:
1436 for (i
=0; i
<count
; i
++) {
1447 * W7. Search backwards from each instance of a European number until
1448 * the first strong type (R, L, or sor) is found. If an L is found,
1449 * then change the type of the European number to L.
1451 for (i
=0; i
<count
; i
++) {
1452 if (types
[i
] == EN
) {
1455 if (types
[j
] == L
) {
1458 } else if (types
[j
] == R
|| types
[j
] == AL
) {
1467 * N1. A sequence of neutrals takes the direction of the surrounding
1468 * strong text if the text on both sides has the same direction. European
1469 * and Arabic numbers are treated as though they were R.
1471 if (count
>= 2 && types
[0] == ON
) {
1472 if ((types
[1] == R
) || (types
[1] == EN
) || (types
[1] == AN
))
1474 else if (types
[1] == L
)
1477 for (i
=1; i
<(count
-1); i
++) {
1478 if (types
[i
] == ON
) {
1479 if (types
[i
-1] == L
) {
1481 while (j
<(count
-1) && types
[j
] == ON
) {
1484 if (types
[j
] == L
) {
1491 } else if ((types
[i
-1] == R
) ||
1492 (types
[i
-1] == EN
) ||
1493 (types
[i
-1] == AN
)) {
1495 while (j
<(count
-1) && types
[j
] == ON
) {
1498 if ((types
[j
] == R
) ||
1509 if (count
>= 2 && types
[count
-1] == ON
) {
1510 if (types
[count
-2] == R
|| types
[count
-2] == EN
|| types
[count
-2] == AN
)
1512 else if (types
[count
-2] == L
)
1517 * N2. Any remaining neutrals take the embedding direction.
1519 for (i
=0; i
<count
; i
++) {
1520 if (types
[i
] == ON
) {
1521 if ((levels
[i
] % 2) == 0)
1529 * I1. For all characters with an even (left-to-right) embedding
1530 * direction, those of type R go up one level and those of type AN or
1531 * EN go up two levels.
1533 for (i
=0; i
<count
; i
++) {
1534 if ((levels
[i
] % 2) == 0) {
1537 else if (types
[i
] == AN
|| types
[i
] == EN
)
1543 * I2. For all characters with an odd (right-to-left) embedding direction,
1544 * those of type L, EN or AN go up one level.
1546 for (i
=0; i
<count
; i
++) {
1547 if ((levels
[i
] % 2) == 1) {
1548 if (types
[i
] == L
|| types
[i
] == EN
|| types
[i
] == AN
)
1554 * L1. On each line, reset the embedding level of the following characters
1555 * to the paragraph embedding level:
1556 * (1)segment separators, (2)paragraph separators,
1557 * (3)any sequence of whitespace characters preceding
1558 * a segment separator or paragraph separator,
1559 * (4)and any sequence of white space characters
1560 * at the end of the line.
1561 * The types of characters used here are the original types, not those
1562 * modified by the previous phase.
1565 while (j
>0 && (getType(line
[j
].wc
) == WS
)) {
1568 if (j
< (count
-1)) {
1569 for (j
++; j
<count
; j
++)
1570 levels
[j
] = paragraphLevel
;
1572 for (i
=0; i
<count
; i
++) {
1573 tempType
= getType(line
[i
].wc
);
1574 if (tempType
== WS
) {
1576 while (j
<count
&& (getType(line
[j
].wc
) == WS
)) {
1579 if (j
==count
|| getType(line
[j
].wc
) == B
||
1580 getType(line
[j
].wc
) == S
) {
1581 for (j
--; j
>=i
; j
--) {
1582 levels
[j
] = paragraphLevel
;
1585 } else if (tempType
== B
|| tempType
== S
) {
1586 levels
[i
] = paragraphLevel
;
1590 /* Rule (L4) NOT IMPLEMENTED
1591 * L4. A character that possesses the mirrored property as specified by
1592 * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
1593 * resolved directionality of that character is R.
1595 /* Note: this is implemented before L2 for efficiency */
1596 for (i
=0; i
<count
; i
++)
1597 if ((levels
[i
] % 2) == 1)
1598 doMirror(&line
[i
].wc
);
1601 * L2. From the highest level found in the text to the lowest odd level on
1602 * each line, including intermediate levels not actually present in the
1603 * text, reverse any contiguous sequence of characters that are at that
1606 /* we flip the character string and leave the level array */
1608 tempType
= levels
[0];
1610 if (levels
[i
] > tempType
)
1611 tempType
= levels
[i
];
1614 /* maximum level in tempType. */
1615 while (tempType
> 0) { /* loop from highest level to the least odd, */
1616 /* which i assume is 1 */
1617 flipThisRun(line
, levels
, tempType
, count
);
1621 /* Rule (L3) NOT IMPLEMENTED
1622 * L3. Combining marks applied to a right-to-left base character will at
1623 * this point precede their base character. If the rendering engine
1624 * expects them to follow the base characters in the final display
1625 * process, then the ordering of the marks and the base character must
1635 * Bad, Horrible function
1636 * takes a pointer to a character that is checked for
1637 * having a mirror glyph.
1639 void doMirror(unsigned int *ch
)
1641 if ((*ch
& 0xFF00) == 0) {
1643 case 0x0028: *ch
= 0x0029; break;
1644 case 0x0029: *ch
= 0x0028; break;
1645 case 0x003C: *ch
= 0x003E; break;
1646 case 0x003E: *ch
= 0x003C; break;
1647 case 0x005B: *ch
= 0x005D; break;
1648 case 0x005D: *ch
= 0x005B; break;
1649 case 0x007B: *ch
= 0x007D; break;
1650 case 0x007D: *ch
= 0x007B; break;
1651 case 0x00AB: *ch
= 0x00BB; break;
1652 case 0x00BB: *ch
= 0x00AB; break;
1654 } else if ((*ch
& 0xFF00) == 0x2000) {
1656 case 0x2039: *ch
= 0x203A; break;
1657 case 0x203A: *ch
= 0x2039; break;
1658 case 0x2045: *ch
= 0x2046; break;
1659 case 0x2046: *ch
= 0x2045; break;
1660 case 0x207D: *ch
= 0x207E; break;
1661 case 0x207E: *ch
= 0x207D; break;
1662 case 0x208D: *ch
= 0x208E; break;
1663 case 0x208E: *ch
= 0x208D; break;
1665 } else if ((*ch
& 0xFF00) == 0x2200) {
1667 case 0x2208: *ch
= 0x220B; break;
1668 case 0x2209: *ch
= 0x220C; break;
1669 case 0x220A: *ch
= 0x220D; break;
1670 case 0x220B: *ch
= 0x2208; break;
1671 case 0x220C: *ch
= 0x2209; break;
1672 case 0x220D: *ch
= 0x220A; break;
1673 case 0x2215: *ch
= 0x29F5; break;
1674 case 0x223C: *ch
= 0x223D; break;
1675 case 0x223D: *ch
= 0x223C; break;
1676 case 0x2243: *ch
= 0x22CD; break;
1677 case 0x2252: *ch
= 0x2253; break;
1678 case 0x2253: *ch
= 0x2252; break;
1679 case 0x2254: *ch
= 0x2255; break;
1680 case 0x2255: *ch
= 0x2254; break;
1681 case 0x2264: *ch
= 0x2265; break;
1682 case 0x2265: *ch
= 0x2264; break;
1683 case 0x2266: *ch
= 0x2267; break;
1684 case 0x2267: *ch
= 0x2266; break;
1685 case 0x2268: *ch
= 0x2269; break;
1686 case 0x2269: *ch
= 0x2268; break;
1687 case 0x226A: *ch
= 0x226B; break;
1688 case 0x226B: *ch
= 0x226A; break;
1689 case 0x226E: *ch
= 0x226F; break;
1690 case 0x226F: *ch
= 0x226E; break;
1691 case 0x2270: *ch
= 0x2271; break;
1692 case 0x2271: *ch
= 0x2270; break;
1693 case 0x2272: *ch
= 0x2273; break;
1694 case 0x2273: *ch
= 0x2272; break;
1695 case 0x2274: *ch
= 0x2275; break;
1696 case 0x2275: *ch
= 0x2274; break;
1697 case 0x2276: *ch
= 0x2277; break;
1698 case 0x2277: *ch
= 0x2276; break;
1699 case 0x2278: *ch
= 0x2279; break;
1700 case 0x2279: *ch
= 0x2278; break;
1701 case 0x227A: *ch
= 0x227B; break;
1702 case 0x227B: *ch
= 0x227A; break;
1703 case 0x227C: *ch
= 0x227D; break;
1704 case 0x227D: *ch
= 0x227C; break;
1705 case 0x227E: *ch
= 0x227F; break;
1706 case 0x227F: *ch
= 0x227E; break;
1707 case 0x2280: *ch
= 0x2281; break;
1708 case 0x2281: *ch
= 0x2280; break;
1709 case 0x2282: *ch
= 0x2283; break;
1710 case 0x2283: *ch
= 0x2282; break;
1711 case 0x2284: *ch
= 0x2285; break;
1712 case 0x2285: *ch
= 0x2284; break;
1713 case 0x2286: *ch
= 0x2287; break;
1714 case 0x2287: *ch
= 0x2286; break;
1715 case 0x2288: *ch
= 0x2289; break;
1716 case 0x2289: *ch
= 0x2288; break;
1717 case 0x228A: *ch
= 0x228B; break;
1718 case 0x228B: *ch
= 0x228A; break;
1719 case 0x228F: *ch
= 0x2290; break;
1720 case 0x2290: *ch
= 0x228F; break;
1721 case 0x2291: *ch
= 0x2292; break;
1722 case 0x2292: *ch
= 0x2291; break;
1723 case 0x2298: *ch
= 0x29B8; break;
1724 case 0x22A2: *ch
= 0x22A3; break;
1725 case 0x22A3: *ch
= 0x22A2; break;
1726 case 0x22A6: *ch
= 0x2ADE; break;
1727 case 0x22A8: *ch
= 0x2AE4; break;
1728 case 0x22A9: *ch
= 0x2AE3; break;
1729 case 0x22AB: *ch
= 0x2AE5; break;
1730 case 0x22B0: *ch
= 0x22B1; break;
1731 case 0x22B1: *ch
= 0x22B0; break;
1732 case 0x22B2: *ch
= 0x22B3; break;
1733 case 0x22B3: *ch
= 0x22B2; break;
1734 case 0x22B4: *ch
= 0x22B5; break;
1735 case 0x22B5: *ch
= 0x22B4; break;
1736 case 0x22B6: *ch
= 0x22B7; break;
1737 case 0x22B7: *ch
= 0x22B6; break;
1738 case 0x22C9: *ch
= 0x22CA; break;
1739 case 0x22CA: *ch
= 0x22C9; break;
1740 case 0x22CB: *ch
= 0x22CC; break;
1741 case 0x22CC: *ch
= 0x22CB; break;
1742 case 0x22CD: *ch
= 0x2243; break;
1743 case 0x22D0: *ch
= 0x22D1; break;
1744 case 0x22D1: *ch
= 0x22D0; break;
1745 case 0x22D6: *ch
= 0x22D7; break;
1746 case 0x22D7: *ch
= 0x22D6; break;
1747 case 0x22D8: *ch
= 0x22D9; break;
1748 case 0x22D9: *ch
= 0x22D8; break;
1749 case 0x22DA: *ch
= 0x22DB; break;
1750 case 0x22DB: *ch
= 0x22DA; break;
1751 case 0x22DC: *ch
= 0x22DD; break;
1752 case 0x22DD: *ch
= 0x22DC; break;
1753 case 0x22DE: *ch
= 0x22DF; break;
1754 case 0x22DF: *ch
= 0x22DE; break;
1755 case 0x22E0: *ch
= 0x22E1; break;
1756 case 0x22E1: *ch
= 0x22E0; break;
1757 case 0x22E2: *ch
= 0x22E3; break;
1758 case 0x22E3: *ch
= 0x22E2; break;
1759 case 0x22E4: *ch
= 0x22E5; break;
1760 case 0x22E5: *ch
= 0x22E4; break;
1761 case 0x22E6: *ch
= 0x22E7; break;
1762 case 0x22E7: *ch
= 0x22E6; break;
1763 case 0x22E8: *ch
= 0x22E9; break;
1764 case 0x22E9: *ch
= 0x22E8; break;
1765 case 0x22EA: *ch
= 0x22EB; break;
1766 case 0x22EB: *ch
= 0x22EA; break;
1767 case 0x22EC: *ch
= 0x22ED; break;
1768 case 0x22ED: *ch
= 0x22EC; break;
1769 case 0x22F0: *ch
= 0x22F1; break;
1770 case 0x22F1: *ch
= 0x22F0; break;
1771 case 0x22F2: *ch
= 0x22FA; break;
1772 case 0x22F3: *ch
= 0x22FB; break;
1773 case 0x22F4: *ch
= 0x22FC; break;
1774 case 0x22F6: *ch
= 0x22FD; break;
1775 case 0x22F7: *ch
= 0x22FE; break;
1776 case 0x22FA: *ch
= 0x22F2; break;
1777 case 0x22FB: *ch
= 0x22F3; break;
1778 case 0x22FC: *ch
= 0x22F4; break;
1779 case 0x22FD: *ch
= 0x22F6; break;
1780 case 0x22FE: *ch
= 0x22F7; break;
1782 } else if ((*ch
& 0xFF00) == 0x2300) {
1784 case 0x2308: *ch
= 0x2309; break;
1785 case 0x2309: *ch
= 0x2308; break;
1786 case 0x230A: *ch
= 0x230B; break;
1787 case 0x230B: *ch
= 0x230A; break;
1788 case 0x2329: *ch
= 0x232A; break;
1789 case 0x232A: *ch
= 0x2329; break;
1791 } else if ((*ch
& 0xFF00) == 0x2700) {
1793 case 0x2768: *ch
= 0x2769; break;
1794 case 0x2769: *ch
= 0x2768; break;
1795 case 0x276A: *ch
= 0x276B; break;
1796 case 0x276B: *ch
= 0x276A; break;
1797 case 0x276C: *ch
= 0x276D; break;
1798 case 0x276D: *ch
= 0x276C; break;
1799 case 0x276E: *ch
= 0x276F; break;
1800 case 0x276F: *ch
= 0x276E; break;
1801 case 0x2770: *ch
= 0x2771; break;
1802 case 0x2771: *ch
= 0x2770; break;
1803 case 0x2772: *ch
= 0x2773; break;
1804 case 0x2773: *ch
= 0x2772; break;
1805 case 0x2774: *ch
= 0x2775; break;
1806 case 0x2775: *ch
= 0x2774; break;
1807 case 0x27D5: *ch
= 0x27D6; break;
1808 case 0x27D6: *ch
= 0x27D5; break;
1809 case 0x27DD: *ch
= 0x27DE; break;
1810 case 0x27DE: *ch
= 0x27DD; break;
1811 case 0x27E2: *ch
= 0x27E3; break;
1812 case 0x27E3: *ch
= 0x27E2; break;
1813 case 0x27E4: *ch
= 0x27E5; break;
1814 case 0x27E5: *ch
= 0x27E4; break;
1815 case 0x27E6: *ch
= 0x27E7; break;
1816 case 0x27E7: *ch
= 0x27E6; break;
1817 case 0x27E8: *ch
= 0x27E9; break;
1818 case 0x27E9: *ch
= 0x27E8; break;
1819 case 0x27EA: *ch
= 0x27EB; break;
1820 case 0x27EB: *ch
= 0x27EA; break;
1822 } else if ((*ch
& 0xFF00) == 0x2900) {
1824 case 0x2983: *ch
= 0x2984; break;
1825 case 0x2984: *ch
= 0x2983; break;
1826 case 0x2985: *ch
= 0x2986; break;
1827 case 0x2986: *ch
= 0x2985; break;
1828 case 0x2987: *ch
= 0x2988; break;
1829 case 0x2988: *ch
= 0x2987; break;
1830 case 0x2989: *ch
= 0x298A; break;
1831 case 0x298A: *ch
= 0x2989; break;
1832 case 0x298B: *ch
= 0x298C; break;
1833 case 0x298C: *ch
= 0x298B; break;
1834 case 0x298D: *ch
= 0x2990; break;
1835 case 0x298E: *ch
= 0x298F; break;
1836 case 0x298F: *ch
= 0x298E; break;
1837 case 0x2990: *ch
= 0x298D; break;
1838 case 0x2991: *ch
= 0x2992; break;
1839 case 0x2992: *ch
= 0x2991; break;
1840 case 0x2993: *ch
= 0x2994; break;
1841 case 0x2994: *ch
= 0x2993; break;
1842 case 0x2995: *ch
= 0x2996; break;
1843 case 0x2996: *ch
= 0x2995; break;
1844 case 0x2997: *ch
= 0x2998; break;
1845 case 0x2998: *ch
= 0x2997; break;
1846 case 0x29B8: *ch
= 0x2298; break;
1847 case 0x29C0: *ch
= 0x29C1; break;
1848 case 0x29C1: *ch
= 0x29C0; break;
1849 case 0x29C4: *ch
= 0x29C5; break;
1850 case 0x29C5: *ch
= 0x29C4; break;
1851 case 0x29CF: *ch
= 0x29D0; break;
1852 case 0x29D0: *ch
= 0x29CF; break;
1853 case 0x29D1: *ch
= 0x29D2; break;
1854 case 0x29D2: *ch
= 0x29D1; break;
1855 case 0x29D4: *ch
= 0x29D5; break;
1856 case 0x29D5: *ch
= 0x29D4; break;
1857 case 0x29D8: *ch
= 0x29D9; break;
1858 case 0x29D9: *ch
= 0x29D8; break;
1859 case 0x29DA: *ch
= 0x29DB; break;
1860 case 0x29DB: *ch
= 0x29DA; break;
1861 case 0x29F5: *ch
= 0x2215; break;
1862 case 0x29F8: *ch
= 0x29F9; break;
1863 case 0x29F9: *ch
= 0x29F8; break;
1864 case 0x29FC: *ch
= 0x29FD; break;
1865 case 0x29FD: *ch
= 0x29FC; break;
1867 } else if ((*ch
& 0xFF00) == 0x2A00) {
1869 case 0x2A2B: *ch
= 0x2A2C; break;
1870 case 0x2A2C: *ch
= 0x2A2B; break;
1871 case 0x2A2D: *ch
= 0x2A2C; break;
1872 case 0x2A2E: *ch
= 0x2A2D; break;
1873 case 0x2A34: *ch
= 0x2A35; break;
1874 case 0x2A35: *ch
= 0x2A34; break;
1875 case 0x2A3C: *ch
= 0x2A3D; break;
1876 case 0x2A3D: *ch
= 0x2A3C; break;
1877 case 0x2A64: *ch
= 0x2A65; break;
1878 case 0x2A65: *ch
= 0x2A64; break;
1879 case 0x2A79: *ch
= 0x2A7A; break;
1880 case 0x2A7A: *ch
= 0x2A79; break;
1881 case 0x2A7D: *ch
= 0x2A7E; break;
1882 case 0x2A7E: *ch
= 0x2A7D; break;
1883 case 0x2A7F: *ch
= 0x2A80; break;
1884 case 0x2A80: *ch
= 0x2A7F; break;
1885 case 0x2A81: *ch
= 0x2A82; break;
1886 case 0x2A82: *ch
= 0x2A81; break;
1887 case 0x2A83: *ch
= 0x2A84; break;
1888 case 0x2A84: *ch
= 0x2A83; break;
1889 case 0x2A8B: *ch
= 0x2A8C; break;
1890 case 0x2A8C: *ch
= 0x2A8B; break;
1891 case 0x2A91: *ch
= 0x2A92; break;
1892 case 0x2A92: *ch
= 0x2A91; break;
1893 case 0x2A93: *ch
= 0x2A94; break;
1894 case 0x2A94: *ch
= 0x2A93; break;
1895 case 0x2A95: *ch
= 0x2A96; break;
1896 case 0x2A96: *ch
= 0x2A95; break;
1897 case 0x2A97: *ch
= 0x2A98; break;
1898 case 0x2A98: *ch
= 0x2A97; break;
1899 case 0x2A99: *ch
= 0x2A9A; break;
1900 case 0x2A9A: *ch
= 0x2A99; break;
1901 case 0x2A9B: *ch
= 0x2A9C; break;
1902 case 0x2A9C: *ch
= 0x2A9B; break;
1903 case 0x2AA1: *ch
= 0x2AA2; break;
1904 case 0x2AA2: *ch
= 0x2AA1; break;
1905 case 0x2AA6: *ch
= 0x2AA7; break;
1906 case 0x2AA7: *ch
= 0x2AA6; break;
1907 case 0x2AA8: *ch
= 0x2AA9; break;
1908 case 0x2AA9: *ch
= 0x2AA8; break;
1909 case 0x2AAA: *ch
= 0x2AAB; break;
1910 case 0x2AAB: *ch
= 0x2AAA; break;
1911 case 0x2AAC: *ch
= 0x2AAD; break;
1912 case 0x2AAD: *ch
= 0x2AAC; break;
1913 case 0x2AAF: *ch
= 0x2AB0; break;
1914 case 0x2AB0: *ch
= 0x2AAF; break;
1915 case 0x2AB3: *ch
= 0x2AB4; break;
1916 case 0x2AB4: *ch
= 0x2AB3; break;
1917 case 0x2ABB: *ch
= 0x2ABC; break;
1918 case 0x2ABC: *ch
= 0x2ABB; break;
1919 case 0x2ABD: *ch
= 0x2ABE; break;
1920 case 0x2ABE: *ch
= 0x2ABD; break;
1921 case 0x2ABF: *ch
= 0x2AC0; break;
1922 case 0x2AC0: *ch
= 0x2ABF; break;
1923 case 0x2AC1: *ch
= 0x2AC2; break;
1924 case 0x2AC2: *ch
= 0x2AC1; break;
1925 case 0x2AC3: *ch
= 0x2AC4; break;
1926 case 0x2AC4: *ch
= 0x2AC3; break;
1927 case 0x2AC5: *ch
= 0x2AC6; break;
1928 case 0x2AC6: *ch
= 0x2AC5; break;
1929 case 0x2ACD: *ch
= 0x2ACE; break;
1930 case 0x2ACE: *ch
= 0x2ACD; break;
1931 case 0x2ACF: *ch
= 0x2AD0; break;
1932 case 0x2AD0: *ch
= 0x2ACF; break;
1933 case 0x2AD1: *ch
= 0x2AD2; break;
1934 case 0x2AD2: *ch
= 0x2AD1; break;
1935 case 0x2AD3: *ch
= 0x2AD4; break;
1936 case 0x2AD4: *ch
= 0x2AD3; break;
1937 case 0x2AD5: *ch
= 0x2AD6; break;
1938 case 0x2AD6: *ch
= 0x2AD5; break;
1939 case 0x2ADE: *ch
= 0x22A6; break;
1940 case 0x2AE3: *ch
= 0x22A9; break;
1941 case 0x2AE4: *ch
= 0x22A8; break;
1942 case 0x2AE5: *ch
= 0x22AB; break;
1943 case 0x2AEC: *ch
= 0x2AED; break;
1944 case 0x2AED: *ch
= 0x2AEC; break;
1945 case 0x2AF7: *ch
= 0x2AF8; break;
1946 case 0x2AF8: *ch
= 0x2AF7; break;
1947 case 0x2AF9: *ch
= 0x2AFA; break;
1948 case 0x2AFA: *ch
= 0x2AF9; break;
1950 } else if ((*ch
& 0xFF00) == 0x3000) {
1952 case 0x3008: *ch
= 0x3009; break;
1953 case 0x3009: *ch
= 0x3008; break;
1954 case 0x300A: *ch
= 0x300B; break;
1955 case 0x300B: *ch
= 0x300A; break;
1956 case 0x300C: *ch
= 0x300D; break;
1957 case 0x300D: *ch
= 0x300C; break;
1958 case 0x300E: *ch
= 0x300F; break;
1959 case 0x300F: *ch
= 0x300E; break;
1960 case 0x3010: *ch
= 0x3011; break;
1961 case 0x3011: *ch
= 0x3010; break;
1962 case 0x3014: *ch
= 0x3015; break;
1963 case 0x3015: *ch
= 0x3014; break;
1964 case 0x3016: *ch
= 0x3017; break;
1965 case 0x3017: *ch
= 0x3016; break;
1966 case 0x3018: *ch
= 0x3019; break;
1967 case 0x3019: *ch
= 0x3018; break;
1968 case 0x301A: *ch
= 0x301B; break;
1969 case 0x301B: *ch
= 0x301A; break;
1971 } else if ((*ch
& 0xFF00) == 0xFF00) {
1973 case 0xFF08: *ch
= 0xFF09; break;
1974 case 0xFF09: *ch
= 0xFF08; break;
1975 case 0xFF1C: *ch
= 0xFF1E; break;
1976 case 0xFF1E: *ch
= 0xFF1C; break;
1977 case 0xFF3B: *ch
= 0xFF3D; break;
1978 case 0xFF3D: *ch
= 0xFF3B; break;
1979 case 0xFF5B: *ch
= 0xFF5D; break;
1980 case 0xFF5D: *ch
= 0xFF5B; break;
1981 case 0xFF5F: *ch
= 0xFF60; break;
1982 case 0xFF60: *ch
= 0xFF5F; break;
1983 case 0xFF62: *ch
= 0xFF63; break;
1984 case 0xFF63: *ch
= 0xFF62; break;
1994 int main(int argc
, char **argv
)
1996 static const struct { int type
; char *name
; } typetoname
[] = {
1997 #define TYPETONAME(X) { X , #X }
2021 for (i
= 1; i
< argc
; i
++) {
2022 unsigned long chr
= strtoul(argv
[i
], NULL
, 0);
2023 int type
= getType(chr
);
2024 assert(typetoname
[type
].type
== type
);
2025 printf("U+%04x: %s\n", chr
, typetoname
[type
].name
);