Reinstate as much of the Windows font-linking behaviour as I can
[u/mdw/putty] / minibidi.c
1 /************************************************************************
2 * $Id$
3 *
4 * ------------
5 * Description:
6 * ------------
7 * This is an implemention of Unicode's Bidirectional Algorithm
8 * (known as UAX #9).
9 *
10 * http://www.unicode.org/reports/tr9/
11 *
12 * Author: Ahmad Khalifa
13 *
14 * -----------------
15 * Revision Details: (Updated by Revision Control System)
16 * -----------------
17 * $Date$
18 * $Author$
19 * $Revision$
20 *
21 * (www.arabeyes.org - under MIT license)
22 *
23 ************************************************************************/
24
25 /*
26 * TODO:
27 * =====
28 * - Explicit marks need to be handled (they are not 100% now)
29 * - Ligatures
30 */
31
32 #include <stdlib.h> /* definition of wchar_t*/
33
34 #include "misc.h"
35
36 #define LMASK 0x3F /* Embedding Level mask */
37 #define OMASK 0xC0 /* Override mask */
38 #define OISL 0x80 /* Override is L */
39 #define OISR 0x40 /* Override is R */
40
41 /* For standalone compilation in a testing mode.
42 * Still depends on the PuTTY headers for snewn and sfree, but can avoid
43 * _linking_ with any other PuTTY code. */
44 #ifdef TEST_GETTYPE
45 #define safemalloc malloc
46 #define safefree free
47 #endif
48
49 /* Shaping Helpers */
50 #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
51 shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
52 #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
53 #define SFINAL(xh) ((xh)+1)
54 #define SINITIAL(xh) ((xh)+2)
55 #define SMEDIAL(ch) ((ch)+3)
56
57 #define leastGreaterOdd(x) ( ((x)+1) | 1 )
58 #define leastGreaterEven(x) ( ((x)+2) &~ 1 )
59
60 typedef struct bidi_char {
61 wchar_t origwc, wc;
62 unsigned short index;
63 } bidi_char;
64
65 /* function declarations */
66 void flipThisRun(bidi_char *from, unsigned char* level, int max, int count);
67 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel);
68 unsigned char getType(int ch);
69 unsigned char setOverrideBits(unsigned char level, unsigned char override);
70 int getPreviousLevel(unsigned char* level, int from);
71 int do_shape(bidi_char *line, bidi_char *to, int count);
72 int do_bidi(bidi_char *line, int count);
73 void doMirror(wchar_t* ch);
74
75 /* character types */
76 enum {
77 L,
78 LRE,
79 LRO,
80 R,
81 AL,
82 RLE,
83 RLO,
84 PDF,
85 EN,
86 ES,
87 ET,
88 AN,
89 CS,
90 NSM,
91 BN,
92 B,
93 S,
94 WS,
95 ON
96 };
97
98 /* Shaping Types */
99 enum {
100 SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
101 SR, /* Right-Joining, ie has Isolated, Final */
102 SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
103 SU, /* Non-Joining */
104 SC /* Join-Causing, like U+0640 (TATWEEL) */
105 };
106
107 typedef struct {
108 char type;
109 wchar_t form_b;
110 } shape_node;
111
112 /* Kept near the actual table, for verification. */
113 #define SHAPE_FIRST 0x621
114 #define SHAPE_LAST 0x64A
115
116 const shape_node shapetypes[] = {
117 /* index, Typ, Iso, Ligature Index*/
118 /* 621 */ {SU, 0xFE80},
119 /* 622 */ {SR, 0xFE81},
120 /* 623 */ {SR, 0xFE83},
121 /* 624 */ {SR, 0xFE85},
122 /* 625 */ {SR, 0xFE87},
123 /* 626 */ {SD, 0xFE89},
124 /* 627 */ {SR, 0xFE8D},
125 /* 628 */ {SD, 0xFE8F},
126 /* 629 */ {SR, 0xFE93},
127 /* 62A */ {SD, 0xFE95},
128 /* 62B */ {SD, 0xFE99},
129 /* 62C */ {SD, 0xFE9D},
130 /* 62D */ {SD, 0xFEA1},
131 /* 62E */ {SD, 0xFEA5},
132 /* 62F */ {SR, 0xFEA9},
133 /* 630 */ {SR, 0xFEAB},
134 /* 631 */ {SR, 0xFEAD},
135 /* 632 */ {SR, 0xFEAF},
136 /* 633 */ {SD, 0xFEB1},
137 /* 634 */ {SD, 0xFEB5},
138 /* 635 */ {SD, 0xFEB9},
139 /* 636 */ {SD, 0xFEBD},
140 /* 637 */ {SD, 0xFEC1},
141 /* 638 */ {SD, 0xFEC5},
142 /* 639 */ {SD, 0xFEC9},
143 /* 63A */ {SD, 0xFECD},
144 /* 63B */ {SU, 0x0},
145 /* 63C */ {SU, 0x0},
146 /* 63D */ {SU, 0x0},
147 /* 63E */ {SU, 0x0},
148 /* 63F */ {SU, 0x0},
149 /* 640 */ {SC, 0x0},
150 /* 641 */ {SD, 0xFED1},
151 /* 642 */ {SD, 0xFED5},
152 /* 643 */ {SD, 0xFED9},
153 /* 644 */ {SD, 0xFEDD},
154 /* 645 */ {SD, 0xFEE1},
155 /* 646 */ {SD, 0xFEE5},
156 /* 647 */ {SD, 0xFEE9},
157 /* 648 */ {SR, 0xFEED},
158 /* 649 */ {SR, 0xFEEF}, /* SD */
159 /* 64A */ {SD, 0xFEF1}
160 };
161
162 /*
163 * Flips the text buffer, according to max level, and
164 * all higher levels
165 *
166 * Input:
167 * from: text buffer, on which to apply flipping
168 * level: resolved levels buffer
169 * max: the maximum level found in this line (should be unsigned char)
170 * count: line size in bidi_char
171 */
172 void flipThisRun(bidi_char *from, unsigned char *level, int max, int count)
173 {
174 int i, j, k, tlevel;
175 bidi_char temp;
176
177 j = i = 0;
178 while (i<count && j<count) {
179
180 /* find the start of the run of level=max */
181 tlevel = max;
182 i = j = findIndexOfRun(level, i, count, max);
183 /* find the end of the run */
184 while (i<count && tlevel <= level[i]) {
185 i++;
186 }
187 for (k = i - 1; k > j; k--, j++) {
188 temp = from[k];
189 from[k] = from[j];
190 from[j] = temp;
191 }
192 }
193 }
194
195 /*
196 * Finds the index of a run with level equals tlevel
197 */
198 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel)
199 {
200 int i;
201 for (i=start; i<count; i++) {
202 if (tlevel == level[i]) {
203 return i;
204 }
205 }
206 return count;
207 }
208
209 /*
210 * Returns the bidi character type of ch.
211 *
212 * The data table in this function is constructed from the Unicode
213 * Character Database, downloadable from unicode.org at the URL
214 *
215 * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
216 *
217 * by the following fragment of Perl:
218
219 perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
220 -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
221 -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
222 -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
223 -e '$pfl=$fl; END { &reset }; sub reset {' \
224 -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
225 -e ' if defined $runstart and $runtype ne "ON";' \
226 -e '$runstart=$runend=$num; $runtype=$type;}' \
227 UnicodeData.txt
228
229 */
230 unsigned char getType(int ch)
231 {
232 static const struct {
233 int first, last, type;
234 } lookup[] = {
235 {0x0000, 0x0008, BN},
236 {0x0009, 0x0009, S},
237 {0x000a, 0x000a, B},
238 {0x000b, 0x000b, S},
239 {0x000c, 0x000c, WS},
240 {0x000d, 0x000d, B},
241 {0x000e, 0x001b, BN},
242 {0x001c, 0x001e, B},
243 {0x001f, 0x001f, S},
244 {0x0020, 0x0020, WS},
245 {0x0023, 0x0025, ET},
246 {0x002b, 0x002b, ES},
247 {0x002c, 0x002c, CS},
248 {0x002d, 0x002d, ES},
249 {0x002e, 0x002f, CS},
250 {0x0030, 0x0039, EN},
251 {0x003a, 0x003a, CS},
252 {0x0041, 0x005a, L},
253 {0x0061, 0x007a, L},
254 {0x007f, 0x0084, BN},
255 {0x0085, 0x0085, B},
256 {0x0086, 0x009f, BN},
257 {0x00a0, 0x00a0, CS},
258 {0x00a2, 0x00a5, ET},
259 {0x00aa, 0x00aa, L},
260 {0x00ad, 0x00ad, BN},
261 {0x00b0, 0x00b1, ET},
262 {0x00b2, 0x00b3, EN},
263 {0x00b5, 0x00b5, L},
264 {0x00b9, 0x00b9, EN},
265 {0x00ba, 0x00ba, L},
266 {0x00c0, 0x00d6, L},
267 {0x00d8, 0x00f6, L},
268 {0x00f8, 0x0236, L},
269 {0x0250, 0x02b8, L},
270 {0x02bb, 0x02c1, L},
271 {0x02d0, 0x02d1, L},
272 {0x02e0, 0x02e4, L},
273 {0x02ee, 0x02ee, L},
274 {0x0300, 0x0357, NSM},
275 {0x035d, 0x036f, NSM},
276 {0x037a, 0x037a, L},
277 {0x0386, 0x0386, L},
278 {0x0388, 0x038a, L},
279 {0x038c, 0x038c, L},
280 {0x038e, 0x03a1, L},
281 {0x03a3, 0x03ce, L},
282 {0x03d0, 0x03f5, L},
283 {0x03f7, 0x03fb, L},
284 {0x0400, 0x0482, L},
285 {0x0483, 0x0486, NSM},
286 {0x0488, 0x0489, NSM},
287 {0x048a, 0x04ce, L},
288 {0x04d0, 0x04f5, L},
289 {0x04f8, 0x04f9, L},
290 {0x0500, 0x050f, L},
291 {0x0531, 0x0556, L},
292 {0x0559, 0x055f, L},
293 {0x0561, 0x0587, L},
294 {0x0589, 0x0589, L},
295 {0x0591, 0x05a1, NSM},
296 {0x05a3, 0x05b9, NSM},
297 {0x05bb, 0x05bd, NSM},
298 {0x05be, 0x05be, R},
299 {0x05bf, 0x05bf, NSM},
300 {0x05c0, 0x05c0, R},
301 {0x05c1, 0x05c2, NSM},
302 {0x05c3, 0x05c3, R},
303 {0x05c4, 0x05c4, NSM},
304 {0x05d0, 0x05ea, R},
305 {0x05f0, 0x05f4, R},
306 {0x0600, 0x0603, AL},
307 {0x060c, 0x060c, CS},
308 {0x060d, 0x060d, AL},
309 {0x0610, 0x0615, NSM},
310 {0x061b, 0x061b, AL},
311 {0x061f, 0x061f, AL},
312 {0x0621, 0x063a, AL},
313 {0x0640, 0x064a, AL},
314 {0x064b, 0x0658, NSM},
315 {0x0660, 0x0669, AN},
316 {0x066a, 0x066a, ET},
317 {0x066b, 0x066c, AN},
318 {0x066d, 0x066f, AL},
319 {0x0670, 0x0670, NSM},
320 {0x0671, 0x06d5, AL},
321 {0x06d6, 0x06dc, NSM},
322 {0x06dd, 0x06dd, AL},
323 {0x06de, 0x06e4, NSM},
324 {0x06e5, 0x06e6, AL},
325 {0x06e7, 0x06e8, NSM},
326 {0x06ea, 0x06ed, NSM},
327 {0x06ee, 0x06ef, AL},
328 {0x06f0, 0x06f9, EN},
329 {0x06fa, 0x070d, AL},
330 {0x070f, 0x070f, BN},
331 {0x0710, 0x0710, AL},
332 {0x0711, 0x0711, NSM},
333 {0x0712, 0x072f, AL},
334 {0x0730, 0x074a, NSM},
335 {0x074d, 0x074f, AL},
336 {0x0780, 0x07a5, AL},
337 {0x07a6, 0x07b0, NSM},
338 {0x07b1, 0x07b1, AL},
339 {0x0901, 0x0902, NSM},
340 {0x0903, 0x0939, L},
341 {0x093c, 0x093c, NSM},
342 {0x093d, 0x0940, L},
343 {0x0941, 0x0948, NSM},
344 {0x0949, 0x094c, L},
345 {0x094d, 0x094d, NSM},
346 {0x0950, 0x0950, L},
347 {0x0951, 0x0954, NSM},
348 {0x0958, 0x0961, L},
349 {0x0962, 0x0963, NSM},
350 {0x0964, 0x0970, L},
351 {0x0981, 0x0981, NSM},
352 {0x0982, 0x0983, L},
353 {0x0985, 0x098c, L},
354 {0x098f, 0x0990, L},
355 {0x0993, 0x09a8, L},
356 {0x09aa, 0x09b0, L},
357 {0x09b2, 0x09b2, L},
358 {0x09b6, 0x09b9, L},
359 {0x09bc, 0x09bc, NSM},
360 {0x09bd, 0x09c0, L},
361 {0x09c1, 0x09c4, NSM},
362 {0x09c7, 0x09c8, L},
363 {0x09cb, 0x09cc, L},
364 {0x09cd, 0x09cd, NSM},
365 {0x09d7, 0x09d7, L},
366 {0x09dc, 0x09dd, L},
367 {0x09df, 0x09e1, L},
368 {0x09e2, 0x09e3, NSM},
369 {0x09e6, 0x09f1, L},
370 {0x09f2, 0x09f3, ET},
371 {0x09f4, 0x09fa, L},
372 {0x0a01, 0x0a02, NSM},
373 {0x0a03, 0x0a03, L},
374 {0x0a05, 0x0a0a, L},
375 {0x0a0f, 0x0a10, L},
376 {0x0a13, 0x0a28, L},
377 {0x0a2a, 0x0a30, L},
378 {0x0a32, 0x0a33, L},
379 {0x0a35, 0x0a36, L},
380 {0x0a38, 0x0a39, L},
381 {0x0a3c, 0x0a3c, NSM},
382 {0x0a3e, 0x0a40, L},
383 {0x0a41, 0x0a42, NSM},
384 {0x0a47, 0x0a48, NSM},
385 {0x0a4b, 0x0a4d, NSM},
386 {0x0a59, 0x0a5c, L},
387 {0x0a5e, 0x0a5e, L},
388 {0x0a66, 0x0a6f, L},
389 {0x0a70, 0x0a71, NSM},
390 {0x0a72, 0x0a74, L},
391 {0x0a81, 0x0a82, NSM},
392 {0x0a83, 0x0a83, L},
393 {0x0a85, 0x0a8d, L},
394 {0x0a8f, 0x0a91, L},
395 {0x0a93, 0x0aa8, L},
396 {0x0aaa, 0x0ab0, L},
397 {0x0ab2, 0x0ab3, L},
398 {0x0ab5, 0x0ab9, L},
399 {0x0abc, 0x0abc, NSM},
400 {0x0abd, 0x0ac0, L},
401 {0x0ac1, 0x0ac5, NSM},
402 {0x0ac7, 0x0ac8, NSM},
403 {0x0ac9, 0x0ac9, L},
404 {0x0acb, 0x0acc, L},
405 {0x0acd, 0x0acd, NSM},
406 {0x0ad0, 0x0ad0, L},
407 {0x0ae0, 0x0ae1, L},
408 {0x0ae2, 0x0ae3, NSM},
409 {0x0ae6, 0x0aef, L},
410 {0x0af1, 0x0af1, ET},
411 {0x0b01, 0x0b01, NSM},
412 {0x0b02, 0x0b03, L},
413 {0x0b05, 0x0b0c, L},
414 {0x0b0f, 0x0b10, L},
415 {0x0b13, 0x0b28, L},
416 {0x0b2a, 0x0b30, L},
417 {0x0b32, 0x0b33, L},
418 {0x0b35, 0x0b39, L},
419 {0x0b3c, 0x0b3c, NSM},
420 {0x0b3d, 0x0b3e, L},
421 {0x0b3f, 0x0b3f, NSM},
422 {0x0b40, 0x0b40, L},
423 {0x0b41, 0x0b43, NSM},
424 {0x0b47, 0x0b48, L},
425 {0x0b4b, 0x0b4c, L},
426 {0x0b4d, 0x0b4d, NSM},
427 {0x0b56, 0x0b56, NSM},
428 {0x0b57, 0x0b57, L},
429 {0x0b5c, 0x0b5d, L},
430 {0x0b5f, 0x0b61, L},
431 {0x0b66, 0x0b71, L},
432 {0x0b82, 0x0b82, NSM},
433 {0x0b83, 0x0b83, L},
434 {0x0b85, 0x0b8a, L},
435 {0x0b8e, 0x0b90, L},
436 {0x0b92, 0x0b95, L},
437 {0x0b99, 0x0b9a, L},
438 {0x0b9c, 0x0b9c, L},
439 {0x0b9e, 0x0b9f, L},
440 {0x0ba3, 0x0ba4, L},
441 {0x0ba8, 0x0baa, L},
442 {0x0bae, 0x0bb5, L},
443 {0x0bb7, 0x0bb9, L},
444 {0x0bbe, 0x0bbf, L},
445 {0x0bc0, 0x0bc0, NSM},
446 {0x0bc1, 0x0bc2, L},
447 {0x0bc6, 0x0bc8, L},
448 {0x0bca, 0x0bcc, L},
449 {0x0bcd, 0x0bcd, NSM},
450 {0x0bd7, 0x0bd7, L},
451 {0x0be7, 0x0bf2, L},
452 {0x0bf9, 0x0bf9, ET},
453 {0x0c01, 0x0c03, L},
454 {0x0c05, 0x0c0c, L},
455 {0x0c0e, 0x0c10, L},
456 {0x0c12, 0x0c28, L},
457 {0x0c2a, 0x0c33, L},
458 {0x0c35, 0x0c39, L},
459 {0x0c3e, 0x0c40, NSM},
460 {0x0c41, 0x0c44, L},
461 {0x0c46, 0x0c48, NSM},
462 {0x0c4a, 0x0c4d, NSM},
463 {0x0c55, 0x0c56, NSM},
464 {0x0c60, 0x0c61, L},
465 {0x0c66, 0x0c6f, L},
466 {0x0c82, 0x0c83, L},
467 {0x0c85, 0x0c8c, L},
468 {0x0c8e, 0x0c90, L},
469 {0x0c92, 0x0ca8, L},
470 {0x0caa, 0x0cb3, L},
471 {0x0cb5, 0x0cb9, L},
472 {0x0cbc, 0x0cbc, NSM},
473 {0x0cbd, 0x0cc4, L},
474 {0x0cc6, 0x0cc8, L},
475 {0x0cca, 0x0ccb, L},
476 {0x0ccc, 0x0ccd, NSM},
477 {0x0cd5, 0x0cd6, L},
478 {0x0cde, 0x0cde, L},
479 {0x0ce0, 0x0ce1, L},
480 {0x0ce6, 0x0cef, L},
481 {0x0d02, 0x0d03, L},
482 {0x0d05, 0x0d0c, L},
483 {0x0d0e, 0x0d10, L},
484 {0x0d12, 0x0d28, L},
485 {0x0d2a, 0x0d39, L},
486 {0x0d3e, 0x0d40, L},
487 {0x0d41, 0x0d43, NSM},
488 {0x0d46, 0x0d48, L},
489 {0x0d4a, 0x0d4c, L},
490 {0x0d4d, 0x0d4d, NSM},
491 {0x0d57, 0x0d57, L},
492 {0x0d60, 0x0d61, L},
493 {0x0d66, 0x0d6f, L},
494 {0x0d82, 0x0d83, L},
495 {0x0d85, 0x0d96, L},
496 {0x0d9a, 0x0db1, L},
497 {0x0db3, 0x0dbb, L},
498 {0x0dbd, 0x0dbd, L},
499 {0x0dc0, 0x0dc6, L},
500 {0x0dca, 0x0dca, NSM},
501 {0x0dcf, 0x0dd1, L},
502 {0x0dd2, 0x0dd4, NSM},
503 {0x0dd6, 0x0dd6, NSM},
504 {0x0dd8, 0x0ddf, L},
505 {0x0df2, 0x0df4, L},
506 {0x0e01, 0x0e30, L},
507 {0x0e31, 0x0e31, NSM},
508 {0x0e32, 0x0e33, L},
509 {0x0e34, 0x0e3a, NSM},
510 {0x0e3f, 0x0e3f, ET},
511 {0x0e40, 0x0e46, L},
512 {0x0e47, 0x0e4e, NSM},
513 {0x0e4f, 0x0e5b, L},
514 {0x0e81, 0x0e82, L},
515 {0x0e84, 0x0e84, L},
516 {0x0e87, 0x0e88, L},
517 {0x0e8a, 0x0e8a, L},
518 {0x0e8d, 0x0e8d, L},
519 {0x0e94, 0x0e97, L},
520 {0x0e99, 0x0e9f, L},
521 {0x0ea1, 0x0ea3, L},
522 {0x0ea5, 0x0ea5, L},
523 {0x0ea7, 0x0ea7, L},
524 {0x0eaa, 0x0eab, L},
525 {0x0ead, 0x0eb0, L},
526 {0x0eb1, 0x0eb1, NSM},
527 {0x0eb2, 0x0eb3, L},
528 {0x0eb4, 0x0eb9, NSM},
529 {0x0ebb, 0x0ebc, NSM},
530 {0x0ebd, 0x0ebd, L},
531 {0x0ec0, 0x0ec4, L},
532 {0x0ec6, 0x0ec6, L},
533 {0x0ec8, 0x0ecd, NSM},
534 {0x0ed0, 0x0ed9, L},
535 {0x0edc, 0x0edd, L},
536 {0x0f00, 0x0f17, L},
537 {0x0f18, 0x0f19, NSM},
538 {0x0f1a, 0x0f34, L},
539 {0x0f35, 0x0f35, NSM},
540 {0x0f36, 0x0f36, L},
541 {0x0f37, 0x0f37, NSM},
542 {0x0f38, 0x0f38, L},
543 {0x0f39, 0x0f39, NSM},
544 {0x0f3e, 0x0f47, L},
545 {0x0f49, 0x0f6a, L},
546 {0x0f71, 0x0f7e, NSM},
547 {0x0f7f, 0x0f7f, L},
548 {0x0f80, 0x0f84, NSM},
549 {0x0f85, 0x0f85, L},
550 {0x0f86, 0x0f87, NSM},
551 {0x0f88, 0x0f8b, L},
552 {0x0f90, 0x0f97, NSM},
553 {0x0f99, 0x0fbc, NSM},
554 {0x0fbe, 0x0fc5, L},
555 {0x0fc6, 0x0fc6, NSM},
556 {0x0fc7, 0x0fcc, L},
557 {0x0fcf, 0x0fcf, L},
558 {0x1000, 0x1021, L},
559 {0x1023, 0x1027, L},
560 {0x1029, 0x102a, L},
561 {0x102c, 0x102c, L},
562 {0x102d, 0x1030, NSM},
563 {0x1031, 0x1031, L},
564 {0x1032, 0x1032, NSM},
565 {0x1036, 0x1037, NSM},
566 {0x1038, 0x1038, L},
567 {0x1039, 0x1039, NSM},
568 {0x1040, 0x1057, L},
569 {0x1058, 0x1059, NSM},
570 {0x10a0, 0x10c5, L},
571 {0x10d0, 0x10f8, L},
572 {0x10fb, 0x10fb, L},
573 {0x1100, 0x1159, L},
574 {0x115f, 0x11a2, L},
575 {0x11a8, 0x11f9, L},
576 {0x1200, 0x1206, L},
577 {0x1208, 0x1246, L},
578 {0x1248, 0x1248, L},
579 {0x124a, 0x124d, L},
580 {0x1250, 0x1256, L},
581 {0x1258, 0x1258, L},
582 {0x125a, 0x125d, L},
583 {0x1260, 0x1286, L},
584 {0x1288, 0x1288, L},
585 {0x128a, 0x128d, L},
586 {0x1290, 0x12ae, L},
587 {0x12b0, 0x12b0, L},
588 {0x12b2, 0x12b5, L},
589 {0x12b8, 0x12be, L},
590 {0x12c0, 0x12c0, L},
591 {0x12c2, 0x12c5, L},
592 {0x12c8, 0x12ce, L},
593 {0x12d0, 0x12d6, L},
594 {0x12d8, 0x12ee, L},
595 {0x12f0, 0x130e, L},
596 {0x1310, 0x1310, L},
597 {0x1312, 0x1315, L},
598 {0x1318, 0x131e, L},
599 {0x1320, 0x1346, L},
600 {0x1348, 0x135a, L},
601 {0x1361, 0x137c, L},
602 {0x13a0, 0x13f4, L},
603 {0x1401, 0x1676, L},
604 {0x1680, 0x1680, WS},
605 {0x1681, 0x169a, L},
606 {0x16a0, 0x16f0, L},
607 {0x1700, 0x170c, L},
608 {0x170e, 0x1711, L},
609 {0x1712, 0x1714, NSM},
610 {0x1720, 0x1731, L},
611 {0x1732, 0x1734, NSM},
612 {0x1735, 0x1736, L},
613 {0x1740, 0x1751, L},
614 {0x1752, 0x1753, NSM},
615 {0x1760, 0x176c, L},
616 {0x176e, 0x1770, L},
617 {0x1772, 0x1773, NSM},
618 {0x1780, 0x17b6, L},
619 {0x17b7, 0x17bd, NSM},
620 {0x17be, 0x17c5, L},
621 {0x17c6, 0x17c6, NSM},
622 {0x17c7, 0x17c8, L},
623 {0x17c9, 0x17d3, NSM},
624 {0x17d4, 0x17da, L},
625 {0x17db, 0x17db, ET},
626 {0x17dc, 0x17dc, L},
627 {0x17dd, 0x17dd, NSM},
628 {0x17e0, 0x17e9, L},
629 {0x180b, 0x180d, NSM},
630 {0x180e, 0x180e, WS},
631 {0x1810, 0x1819, L},
632 {0x1820, 0x1877, L},
633 {0x1880, 0x18a8, L},
634 {0x18a9, 0x18a9, NSM},
635 {0x1900, 0x191c, L},
636 {0x1920, 0x1922, NSM},
637 {0x1923, 0x1926, L},
638 {0x1927, 0x192b, NSM},
639 {0x1930, 0x1931, L},
640 {0x1932, 0x1932, NSM},
641 {0x1933, 0x1938, L},
642 {0x1939, 0x193b, NSM},
643 {0x1946, 0x196d, L},
644 {0x1970, 0x1974, L},
645 {0x1d00, 0x1d6b, L},
646 {0x1e00, 0x1e9b, L},
647 {0x1ea0, 0x1ef9, L},
648 {0x1f00, 0x1f15, L},
649 {0x1f18, 0x1f1d, L},
650 {0x1f20, 0x1f45, L},
651 {0x1f48, 0x1f4d, L},
652 {0x1f50, 0x1f57, L},
653 {0x1f59, 0x1f59, L},
654 {0x1f5b, 0x1f5b, L},
655 {0x1f5d, 0x1f5d, L},
656 {0x1f5f, 0x1f7d, L},
657 {0x1f80, 0x1fb4, L},
658 {0x1fb6, 0x1fbc, L},
659 {0x1fbe, 0x1fbe, L},
660 {0x1fc2, 0x1fc4, L},
661 {0x1fc6, 0x1fcc, L},
662 {0x1fd0, 0x1fd3, L},
663 {0x1fd6, 0x1fdb, L},
664 {0x1fe0, 0x1fec, L},
665 {0x1ff2, 0x1ff4, L},
666 {0x1ff6, 0x1ffc, L},
667 {0x2000, 0x200a, WS},
668 {0x200b, 0x200d, BN},
669 {0x200e, 0x200e, L},
670 {0x200f, 0x200f, R},
671 {0x2028, 0x2028, WS},
672 {0x2029, 0x2029, B},
673 {0x202a, 0x202a, LRE},
674 {0x202b, 0x202b, RLE},
675 {0x202c, 0x202c, PDF},
676 {0x202d, 0x202d, LRO},
677 {0x202e, 0x202e, RLO},
678 {0x202f, 0x202f, WS},
679 {0x2030, 0x2034, ET},
680 {0x2044, 0x2044, CS},
681 {0x205f, 0x205f, WS},
682 {0x2060, 0x2063, BN},
683 {0x206a, 0x206f, BN},
684 {0x2070, 0x2070, EN},
685 {0x2071, 0x2071, L},
686 {0x2074, 0x2079, EN},
687 {0x207a, 0x207b, ET},
688 {0x207f, 0x207f, L},
689 {0x2080, 0x2089, EN},
690 {0x208a, 0x208b, ET},
691 {0x20a0, 0x20b1, ET},
692 {0x20d0, 0x20ea, NSM},
693 {0x2102, 0x2102, L},
694 {0x2107, 0x2107, L},
695 {0x210a, 0x2113, L},
696 {0x2115, 0x2115, L},
697 {0x2119, 0x211d, L},
698 {0x2124, 0x2124, L},
699 {0x2126, 0x2126, L},
700 {0x2128, 0x2128, L},
701 {0x212a, 0x212d, L},
702 {0x212e, 0x212e, ET},
703 {0x212f, 0x2131, L},
704 {0x2133, 0x2139, L},
705 {0x213d, 0x213f, L},
706 {0x2145, 0x2149, L},
707 {0x2160, 0x2183, L},
708 {0x2212, 0x2213, ET},
709 {0x2336, 0x237a, L},
710 {0x2395, 0x2395, L},
711 {0x2488, 0x249b, EN},
712 {0x249c, 0x24e9, L},
713 {0x2800, 0x28ff, L},
714 {0x3000, 0x3000, WS},
715 {0x3005, 0x3007, L},
716 {0x3021, 0x3029, L},
717 {0x302a, 0x302f, NSM},
718 {0x3031, 0x3035, L},
719 {0x3038, 0x303c, L},
720 {0x3041, 0x3096, L},
721 {0x3099, 0x309a, NSM},
722 {0x309d, 0x309f, L},
723 {0x30a1, 0x30fa, L},
724 {0x30fc, 0x30ff, L},
725 {0x3105, 0x312c, L},
726 {0x3131, 0x318e, L},
727 {0x3190, 0x31b7, L},
728 {0x31f0, 0x321c, L},
729 {0x3220, 0x3243, L},
730 {0x3260, 0x327b, L},
731 {0x327f, 0x32b0, L},
732 {0x32c0, 0x32cb, L},
733 {0x32d0, 0x32fe, L},
734 {0x3300, 0x3376, L},
735 {0x337b, 0x33dd, L},
736 {0x33e0, 0x33fe, L},
737 {0x3400, 0x4db5, L},
738 {0x4e00, 0x9fa5, L},
739 {0xa000, 0xa48c, L},
740 {0xac00, 0xd7a3, L},
741 {0xd800, 0xfa2d, L},
742 {0xfa30, 0xfa6a, L},
743 {0xfb00, 0xfb06, L},
744 {0xfb13, 0xfb17, L},
745 {0xfb1d, 0xfb1d, R},
746 {0xfb1e, 0xfb1e, NSM},
747 {0xfb1f, 0xfb28, R},
748 {0xfb29, 0xfb29, ET},
749 {0xfb2a, 0xfb36, R},
750 {0xfb38, 0xfb3c, R},
751 {0xfb3e, 0xfb3e, R},
752 {0xfb40, 0xfb41, R},
753 {0xfb43, 0xfb44, R},
754 {0xfb46, 0xfb4f, R},
755 {0xfb50, 0xfbb1, AL},
756 {0xfbd3, 0xfd3d, AL},
757 {0xfd50, 0xfd8f, AL},
758 {0xfd92, 0xfdc7, AL},
759 {0xfdf0, 0xfdfc, AL},
760 {0xfe00, 0xfe0f, NSM},
761 {0xfe20, 0xfe23, NSM},
762 {0xfe50, 0xfe50, CS},
763 {0xfe52, 0xfe52, CS},
764 {0xfe55, 0xfe55, CS},
765 {0xfe5f, 0xfe5f, ET},
766 {0xfe62, 0xfe63, ET},
767 {0xfe69, 0xfe6a, ET},
768 {0xfe70, 0xfe74, AL},
769 {0xfe76, 0xfefc, AL},
770 {0xfeff, 0xfeff, BN},
771 {0xff03, 0xff05, ET},
772 {0xff0b, 0xff0b, ET},
773 {0xff0c, 0xff0c, CS},
774 {0xff0d, 0xff0d, ET},
775 {0xff0e, 0xff0e, CS},
776 {0xff0f, 0xff0f, ES},
777 {0xff10, 0xff19, EN},
778 {0xff1a, 0xff1a, CS},
779 {0xff21, 0xff3a, L},
780 {0xff41, 0xff5a, L},
781 {0xff66, 0xffbe, L},
782 {0xffc2, 0xffc7, L},
783 {0xffca, 0xffcf, L},
784 {0xffd2, 0xffd7, L},
785 {0xffda, 0xffdc, L},
786 {0xffe0, 0xffe1, ET},
787 {0xffe5, 0xffe6, ET},
788 {0x10000, 0x1000b, L},
789 {0x1000d, 0x10026, L},
790 {0x10028, 0x1003a, L},
791 {0x1003c, 0x1003d, L},
792 {0x1003f, 0x1004d, L},
793 {0x10050, 0x1005d, L},
794 {0x10080, 0x100fa, L},
795 {0x10100, 0x10100, L},
796 {0x10102, 0x10102, L},
797 {0x10107, 0x10133, L},
798 {0x10137, 0x1013f, L},
799 {0x10300, 0x1031e, L},
800 {0x10320, 0x10323, L},
801 {0x10330, 0x1034a, L},
802 {0x10380, 0x1039d, L},
803 {0x1039f, 0x1039f, L},
804 {0x10400, 0x1049d, L},
805 {0x104a0, 0x104a9, L},
806 {0x10800, 0x10805, R},
807 {0x10808, 0x10808, R},
808 {0x1080a, 0x10835, R},
809 {0x10837, 0x10838, R},
810 {0x1083c, 0x1083c, R},
811 {0x1083f, 0x1083f, R},
812 {0x1d000, 0x1d0f5, L},
813 {0x1d100, 0x1d126, L},
814 {0x1d12a, 0x1d166, L},
815 {0x1d167, 0x1d169, NSM},
816 {0x1d16a, 0x1d172, L},
817 {0x1d173, 0x1d17a, BN},
818 {0x1d17b, 0x1d182, NSM},
819 {0x1d183, 0x1d184, L},
820 {0x1d185, 0x1d18b, NSM},
821 {0x1d18c, 0x1d1a9, L},
822 {0x1d1aa, 0x1d1ad, NSM},
823 {0x1d1ae, 0x1d1dd, L},
824 {0x1d400, 0x1d454, L},
825 {0x1d456, 0x1d49c, L},
826 {0x1d49e, 0x1d49f, L},
827 {0x1d4a2, 0x1d4a2, L},
828 {0x1d4a5, 0x1d4a6, L},
829 {0x1d4a9, 0x1d4ac, L},
830 {0x1d4ae, 0x1d4b9, L},
831 {0x1d4bb, 0x1d4bb, L},
832 {0x1d4bd, 0x1d4c3, L},
833 {0x1d4c5, 0x1d505, L},
834 {0x1d507, 0x1d50a, L},
835 {0x1d50d, 0x1d514, L},
836 {0x1d516, 0x1d51c, L},
837 {0x1d51e, 0x1d539, L},
838 {0x1d53b, 0x1d53e, L},
839 {0x1d540, 0x1d544, L},
840 {0x1d546, 0x1d546, L},
841 {0x1d54a, 0x1d550, L},
842 {0x1d552, 0x1d6a3, L},
843 {0x1d6a8, 0x1d7c9, L},
844 {0x1d7ce, 0x1d7ff, EN},
845 {0x20000, 0x2a6d6, L},
846 {0x2f800, 0x2fa1d, L},
847 {0xe0001, 0xe0001, BN},
848 {0xe0020, 0xe007f, BN},
849 {0xe0100, 0xe01ef, NSM},
850 {0xf0000, 0xffffd, L},
851 {0x100000, 0x10fffd, L}
852 };
853
854 int i, j, k;
855
856 i = -1;
857 j = lenof(lookup);
858
859 while (j - i > 1) {
860 k = (i + j) / 2;
861 if (ch < lookup[k].first)
862 j = k;
863 else if (ch > lookup[k].last)
864 i = k;
865 else
866 return lookup[k].type;
867 }
868
869 /*
870 * If we reach here, the character was not in any of the
871 * intervals listed in the lookup table. This means we return
872 * ON (`Other Neutrals'). This is the appropriate code for any
873 * character genuinely not listed in the Unicode table, and
874 * also the table above has deliberately left out any
875 * characters _explicitly_ listed as ON (to save space!).
876 */
877 return ON;
878 }
879
880 /*
881 * Function exported to front ends to allow them to identify
882 * bidi-active characters (in case, for example, the platform's
883 * text display function can't conveniently be prevented from doing
884 * its own bidi and so special treatment is required for characters
885 * that would cause the bidi algorithm to activate).
886 *
887 * This function is passed a single Unicode code point, and returns
888 * nonzero if the presence of this code point can possibly cause
889 * the bidi algorithm to do any reordering. Thus, any string
890 * composed entirely of characters for which is_rtl() returns zero
891 * should be safe to pass to a bidi-active platform display
892 * function without fear.
893 *
894 * (is_rtl() must therefore also return true for any character
895 * which would be affected by Arabic shaping, but this isn't
896 * important because all such characters are right-to-left so it
897 * would have flagged them anyway.)
898 */
899 int is_rtl(int c)
900 {
901 /*
902 * After careful reading of the Unicode bidi algorithm (URL as
903 * given at the top of this file) I believe that the only
904 * character classes which can possibly cause trouble are R,
905 * AL, RLE and RLO. I think that any string containing no
906 * character in any of those classes will be displayed
907 * uniformly left-to-right by the Unicode bidi algorithm.
908 */
909 const int mask = (1<<R) | (1<<AL) | (1<<RLE) | (1<<RLO);
910
911 return mask & (1 << (getType(c)));
912 }
913
914 /*
915 * The most significant 2 bits of each level are used to store
916 * Override status of each character
917 * This function sets the override bits of level according
918 * to the value in override, and reurns the new byte.
919 */
920 unsigned char setOverrideBits(unsigned char level, unsigned char override)
921 {
922 if (override == ON)
923 return level;
924 else if (override == R)
925 return level | OISR;
926 else if (override == L)
927 return level | OISL;
928 return level;
929 }
930
931 /*
932 * Find the most recent run of the same value in `level', and
933 * return the value _before_ it. Used to process U+202C POP
934 * DIRECTIONAL FORMATTING.
935 */
936 int getPreviousLevel(unsigned char* level, int from)
937 {
938 if (from > 0) {
939 unsigned char current = level[--from];
940
941 while (from >= 0 && level[from] == current)
942 from--;
943
944 if (from >= 0)
945 return level[from];
946
947 return -1;
948 } else
949 return -1;
950 }
951
952 /* The Main shaping function, and the only one to be used
953 * by the outside world.
954 *
955 * line: buffer to apply shaping to. this must be passed by doBidi() first
956 * to: output buffer for the shaped data
957 * count: number of characters in line
958 */
959 int do_shape(bidi_char *line, bidi_char *to, int count)
960 {
961 int i, tempShape, ligFlag;
962
963 for (ligFlag=i=0; i<count; i++) {
964 to[i] = line[i];
965 tempShape = STYPE(line[i].wc);
966 switch (tempShape) {
967 case SC:
968 break;
969
970 case SU:
971 break;
972
973 case SR:
974 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
975 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
976 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
977 else
978 to[i].wc = SISOLATED(line[i].wc);
979 break;
980
981
982 case SD:
983 /* Make Ligatures */
984 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
985 if (line[i].wc == 0x644) {
986 if (i > 0) switch (line[i-1].wc) {
987 case 0x622:
988 ligFlag = 1;
989 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
990 to[i].wc = 0xFEF6;
991 else
992 to[i].wc = 0xFEF5;
993 break;
994 case 0x623:
995 ligFlag = 1;
996 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
997 to[i].wc = 0xFEF8;
998 else
999 to[i].wc = 0xFEF7;
1000 break;
1001 case 0x625:
1002 ligFlag = 1;
1003 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1004 to[i].wc = 0xFEFA;
1005 else
1006 to[i].wc = 0xFEF9;
1007 break;
1008 case 0x627:
1009 ligFlag = 1;
1010 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1011 to[i].wc = 0xFEFC;
1012 else
1013 to[i].wc = 0xFEFB;
1014 break;
1015 }
1016 if (ligFlag) {
1017 to[i-1].wc = 0x20;
1018 ligFlag = 0;
1019 break;
1020 }
1021 }
1022
1023 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
1024 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
1025 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
1026 to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
1027 else
1028 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
1029 break;
1030 }
1031
1032 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
1033 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
1034 to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
1035 else
1036 to[i].wc = SISOLATED(line[i].wc);
1037 break;
1038
1039
1040 }
1041 }
1042 return 1;
1043 }
1044
1045 /*
1046 * The Main Bidi Function, and the only function that should
1047 * be used by the outside world.
1048 *
1049 * line: a buffer of size count containing text to apply
1050 * the Bidirectional algorithm to.
1051 */
1052
1053 int do_bidi(bidi_char *line, int count)
1054 {
1055 unsigned char* types;
1056 unsigned char* levels;
1057 unsigned char paragraphLevel;
1058 unsigned char currentEmbedding;
1059 unsigned char currentOverride;
1060 unsigned char tempType;
1061 int i, j, imax, yes, bover;
1062
1063 /* Check the presence of R or AL types as optimization */
1064 yes = 0;
1065 for (i=0; i<count; i++) {
1066 int type = getType(line[i].wc);
1067 if (type == R || type == AL) {
1068 yes = 1;
1069 break;
1070 }
1071 }
1072 if (yes == 0)
1073 return L;
1074
1075 /* Initialize types, levels */
1076 types = snewn(count, unsigned char);
1077 levels = snewn(count, unsigned char);
1078
1079 /* Rule (P1) NOT IMPLEMENTED
1080 * P1. Split the text into separate paragraphs. A paragraph separator is
1081 * kept with the previous paragraph. Within each paragraph, apply all the
1082 * other rules of this algorithm.
1083 */
1084
1085 /* Rule (P2), (P3)
1086 * P2. In each paragraph, find the first character of type L, AL, or R.
1087 * P3. If a character is found in P2 and it is of type AL or R, then set
1088 * the paragraph embedding level to one; otherwise, set it to zero.
1089 */
1090 paragraphLevel = 0;
1091 for (i=0; i<count ; i++) {
1092 int type = getType(line[i].wc);
1093 if (type == R || type == AL) {
1094 paragraphLevel = 1;
1095 break;
1096 } else if (type == L)
1097 break;
1098 }
1099
1100 /* Rule (X1)
1101 * X1. Begin by setting the current embedding level to the paragraph
1102 * embedding level. Set the directional override status to neutral.
1103 */
1104 currentEmbedding = paragraphLevel;
1105 currentOverride = ON;
1106
1107 /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
1108 * X2. With each RLE, compute the least greater odd embedding level.
1109 * X3. With each LRE, compute the least greater even embedding level.
1110 * X4. With each RLO, compute the least greater odd embedding level.
1111 * X5. With each LRO, compute the least greater even embedding level.
1112 * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
1113 * a. Set the level of the current character to the current
1114 * embedding level.
1115 * b. Whenever the directional override status is not neutral,
1116 * reset the current character type to the directional
1117 * override status.
1118 * X7. With each PDF, determine the matching embedding or override code.
1119 * If there was a valid matching code, restore (pop) the last
1120 * remembered (pushed) embedding level and directional override.
1121 * X8. All explicit directional embeddings and overrides are completely
1122 * terminated at the end of each paragraph. Paragraph separators are not
1123 * included in the embedding. (Useless here) NOT IMPLEMENTED
1124 */
1125 bover = 0;
1126 for (i=0; i<count; i++) {
1127 tempType = getType(line[i].wc);
1128 switch (tempType) {
1129 case RLE:
1130 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1131 levels[i] = setOverrideBits(levels[i], currentOverride);
1132 currentOverride = ON;
1133 break;
1134
1135 case LRE:
1136 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1137 levels[i] = setOverrideBits(levels[i], currentOverride);
1138 currentOverride = ON;
1139 break;
1140
1141 case RLO:
1142 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1143 tempType = currentOverride = R;
1144 bover = 1;
1145 break;
1146
1147 case LRO:
1148 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1149 tempType = currentOverride = L;
1150 bover = 1;
1151 break;
1152
1153 case PDF:
1154 {
1155 int prevlevel = getPreviousLevel(levels, i);
1156
1157 if (prevlevel == -1) {
1158 currentEmbedding = paragraphLevel;
1159 currentOverride = ON;
1160 } else {
1161 currentOverride = currentEmbedding & OMASK;
1162 currentEmbedding = currentEmbedding & ~OMASK;
1163 }
1164 }
1165 levels[i] = currentEmbedding;
1166 break;
1167
1168 /* Whitespace is treated as neutral for now */
1169 case WS:
1170 case S:
1171 levels[i] = currentEmbedding;
1172 tempType = ON;
1173 if (currentOverride != ON)
1174 tempType = currentOverride;
1175 break;
1176
1177 default:
1178 levels[i] = currentEmbedding;
1179 if (currentOverride != ON)
1180 tempType = currentOverride;
1181 break;
1182
1183 }
1184 types[i] = tempType;
1185 }
1186 /* this clears out all overrides, so we can use levels safely... */
1187 /* checks bover first */
1188 if (bover)
1189 for (i=0; i<count; i++)
1190 levels[i] = levels[i] & LMASK;
1191
1192 /* Rule (X9)
1193 * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
1194 * Here, they're converted to BN.
1195 */
1196 for (i=0; i<count; i++) {
1197 switch (types[i]) {
1198 case RLE:
1199 case LRE:
1200 case RLO:
1201 case LRO:
1202 case PDF:
1203 types[i] = BN;
1204 break;
1205 }
1206 }
1207
1208 /* Rule (W1)
1209 * W1. Examine each non-spacing mark (NSM) in the level run, and change
1210 * the type of the NSM to the type of the previous character. If the NSM
1211 * is at the start of the level run, it will get the type of sor.
1212 */
1213 if (types[0] == NSM)
1214 types[0] = paragraphLevel;
1215
1216 for (i=1; i<count; i++) {
1217 if (types[i] == NSM)
1218 types[i] = types[i-1];
1219 /* Is this a safe assumption?
1220 * I assumed the previous, IS a character.
1221 */
1222 }
1223
1224 /* Rule (W2)
1225 * W2. Search backwards from each instance of a European number until the
1226 * first strong type (R, L, AL, or sor) is found. If an AL is found,
1227 * change the type of the European number to Arabic number.
1228 */
1229 for (i=0; i<count; i++) {
1230 if (types[i] == EN) {
1231 j=i;
1232 while (j >= 0) {
1233 if (types[j] == AL) {
1234 types[i] = AN;
1235 break;
1236 } else if (types[j] == R || types[j] == L) {
1237 break;
1238 }
1239 j--;
1240 }
1241 }
1242 }
1243
1244 /* Rule (W3)
1245 * W3. Change all ALs to R.
1246 *
1247 * Optimization: on Rule Xn, we might set a flag on AL type
1248 * to prevent this loop in L R lines only...
1249 */
1250 for (i=0; i<count; i++) {
1251 if (types[i] == AL)
1252 types[i] = R;
1253 }
1254
1255 /* Rule (W4)
1256 * W4. A single European separator between two European numbers changes
1257 * to a European number. A single common separator between two numbers
1258 * of the same type changes to that type.
1259 */
1260 for (i=1; i<(count-1); i++) {
1261 if (types[i] == ES) {
1262 if (types[i-1] == EN && types[i+1] == EN)
1263 types[i] = EN;
1264 } else if (types[i] == CS) {
1265 if (types[i-1] == EN && types[i+1] == EN)
1266 types[i] = EN;
1267 else if (types[i-1] == AN && types[i+1] == AN)
1268 types[i] = AN;
1269 }
1270 }
1271
1272 /* Rule (W5)
1273 * W5. A sequence of European terminators adjacent to European numbers
1274 * changes to all European numbers.
1275 *
1276 * Optimization: lots here... else ifs need rearrangement
1277 */
1278 for (i=0; i<count; i++) {
1279 if (types[i] == ET) {
1280 if (i > 0 && types[i-1] == EN) {
1281 types[i] = EN;
1282 continue;
1283 } else if (i < count-1 && types[i+1] == EN) {
1284 types[i] = EN;
1285 continue;
1286 } else if (i < count-1 && types[i+1] == ET) {
1287 j=i;
1288 while (j <count && types[j] == ET) {
1289 j++;
1290 }
1291 if (types[j] == EN)
1292 types[i] = EN;
1293 }
1294 }
1295 }
1296
1297 /* Rule (W6)
1298 * W6. Otherwise, separators and terminators change to Other Neutral:
1299 */
1300 for (i=0; i<count; i++) {
1301 switch (types[i]) {
1302 case ES:
1303 case ET:
1304 case CS:
1305 types[i] = ON;
1306 break;
1307 }
1308 }
1309
1310 /* Rule (W7)
1311 * W7. Search backwards from each instance of a European number until
1312 * the first strong type (R, L, or sor) is found. If an L is found,
1313 * then change the type of the European number to L.
1314 */
1315 for (i=0; i<count; i++) {
1316 if (types[i] == EN) {
1317 j=i;
1318 while (j >= 0) {
1319 if (types[j] == L) {
1320 types[i] = L;
1321 break;
1322 } else if (types[j] == R || types[j] == AL) {
1323 break;
1324 }
1325 j--;
1326 }
1327 }
1328 }
1329
1330 /* Rule (N1)
1331 * N1. A sequence of neutrals takes the direction of the surrounding
1332 * strong text if the text on both sides has the same direction. European
1333 * and Arabic numbers are treated as though they were R.
1334 */
1335 if (count >= 2 && types[0] == ON) {
1336 if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
1337 types[0] = R;
1338 else if (types[1] == L)
1339 types[0] = L;
1340 }
1341 for (i=1; i<(count-1); i++) {
1342 if (types[i] == ON) {
1343 if (types[i-1] == L) {
1344 j=i;
1345 while (j<(count-1) && types[j] == ON) {
1346 j++;
1347 }
1348 if (types[j] == L) {
1349 while (i<j) {
1350 types[i] = L;
1351 i++;
1352 }
1353 }
1354
1355 } else if ((types[i-1] == R) ||
1356 (types[i-1] == EN) ||
1357 (types[i-1] == AN)) {
1358 j=i;
1359 while (j<(count-1) && types[j] == ON) {
1360 j++;
1361 }
1362 if ((types[j] == R) ||
1363 (types[j] == EN) ||
1364 (types[j] == AN)) {
1365 while (i<j) {
1366 types[i] = R;
1367 i++;
1368 }
1369 }
1370 }
1371 }
1372 }
1373 if (count >= 2 && types[count-1] == ON) {
1374 if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
1375 types[count-1] = R;
1376 else if (types[count-2] == L)
1377 types[count-1] = L;
1378 }
1379
1380 /* Rule (N2)
1381 * N2. Any remaining neutrals take the embedding direction.
1382 */
1383 for (i=0; i<count; i++) {
1384 if (types[i] == ON) {
1385 if ((levels[i] % 2) == 0)
1386 types[i] = L;
1387 else
1388 types[i] = R;
1389 }
1390 }
1391
1392 /* Rule (I1)
1393 * I1. For all characters with an even (left-to-right) embedding
1394 * direction, those of type R go up one level and those of type AN or
1395 * EN go up two levels.
1396 */
1397 for (i=0; i<count; i++) {
1398 if ((levels[i] % 2) == 0) {
1399 if (types[i] == R)
1400 levels[i] += 1;
1401 else if (types[i] == AN || types[i] == EN)
1402 levels[i] += 2;
1403 }
1404 }
1405
1406 /* Rule (I2)
1407 * I2. For all characters with an odd (right-to-left) embedding direction,
1408 * those of type L, EN or AN go up one level.
1409 */
1410 for (i=0; i<count; i++) {
1411 if ((levels[i] % 2) == 1) {
1412 if (types[i] == L || types[i] == EN || types[i] == AN)
1413 levels[i] += 1;
1414 }
1415 }
1416
1417 /* Rule (L1)
1418 * L1. On each line, reset the embedding level of the following characters
1419 * to the paragraph embedding level:
1420 * (1)segment separators, (2)paragraph separators,
1421 * (3)any sequence of whitespace characters preceding
1422 * a segment separator or paragraph separator,
1423 * (4)and any sequence of white space characters
1424 * at the end of the line.
1425 * The types of characters used here are the original types, not those
1426 * modified by the previous phase.
1427 */
1428 j=count-1;
1429 while (j>0 && (getType(line[j].wc) == WS)) {
1430 j--;
1431 }
1432 if (j < (count-1)) {
1433 for (j++; j<count; j++)
1434 levels[j] = paragraphLevel;
1435 }
1436 for (i=0; i<count; i++) {
1437 tempType = getType(line[i].wc);
1438 if (tempType == WS) {
1439 j=i;
1440 while (j<count && (getType(line[j].wc) == WS)) {
1441 j++;
1442 }
1443 if (j==count || getType(line[j].wc) == B ||
1444 getType(line[j].wc) == S) {
1445 for (j--; j>=i ; j--) {
1446 levels[j] = paragraphLevel;
1447 }
1448 }
1449 } else if (tempType == B || tempType == S) {
1450 levels[i] = paragraphLevel;
1451 }
1452 }
1453
1454 /* Rule (L4) NOT IMPLEMENTED
1455 * L4. A character that possesses the mirrored property as specified by
1456 * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
1457 * resolved directionality of that character is R.
1458 */
1459 /* Note: this is implemented before L2 for efficiency */
1460 for (i=0; i<count; i++)
1461 if ((levels[i] % 2) == 1)
1462 doMirror(&line[i].wc);
1463
1464 /* Rule (L2)
1465 * L2. From the highest level found in the text to the lowest odd level on
1466 * each line, including intermediate levels not actually present in the
1467 * text, reverse any contiguous sequence of characters that are at that
1468 * level or higher
1469 */
1470 /* we flip the character string and leave the level array */
1471 imax = 0;
1472 i=0;
1473 tempType = levels[0];
1474 while (i < count) {
1475 if (levels[i] > tempType) {
1476 tempType = levels[i];
1477 imax=i;
1478 }
1479 i++;
1480 }
1481 /* maximum level in tempType, its index in imax. */
1482 while (tempType > 0) { /* loop from highest level to the least odd, */
1483 /* which i assume is 1 */
1484 flipThisRun(line, levels, tempType, count);
1485 tempType--;
1486 }
1487
1488 /* Rule (L3) NOT IMPLEMENTED
1489 * L3. Combining marks applied to a right-to-left base character will at
1490 * this point precede their base character. If the rendering engine
1491 * expects them to follow the base characters in the final display
1492 * process, then the ordering of the marks and the base character must
1493 * be reversed.
1494 */
1495 sfree(types);
1496 sfree(levels);
1497 return R;
1498 }
1499
1500
1501 /*
1502 * Bad, Horrible function
1503 * takes a pointer to a character that is checked for
1504 * having a mirror glyph.
1505 */
1506 void doMirror(wchar_t* ch)
1507 {
1508 if ((*ch & 0xFF00) == 0) {
1509 switch (*ch) {
1510 case 0x0028: *ch = 0x0029; break;
1511 case 0x0029: *ch = 0x0028; break;
1512 case 0x003C: *ch = 0x003E; break;
1513 case 0x003E: *ch = 0x003C; break;
1514 case 0x005B: *ch = 0x005D; break;
1515 case 0x005D: *ch = 0x005B; break;
1516 case 0x007B: *ch = 0x007D; break;
1517 case 0x007D: *ch = 0x007B; break;
1518 case 0x00AB: *ch = 0x00BB; break;
1519 case 0x00BB: *ch = 0x00AB; break;
1520 }
1521 } else if ((*ch & 0xFF00) == 0x2000) {
1522 switch (*ch) {
1523 case 0x2039: *ch = 0x203A; break;
1524 case 0x203A: *ch = 0x2039; break;
1525 case 0x2045: *ch = 0x2046; break;
1526 case 0x2046: *ch = 0x2045; break;
1527 case 0x207D: *ch = 0x207E; break;
1528 case 0x207E: *ch = 0x207D; break;
1529 case 0x208D: *ch = 0x208E; break;
1530 case 0x208E: *ch = 0x208D; break;
1531 }
1532 } else if ((*ch & 0xFF00) == 0x2200) {
1533 switch (*ch) {
1534 case 0x2208: *ch = 0x220B; break;
1535 case 0x2209: *ch = 0x220C; break;
1536 case 0x220A: *ch = 0x220D; break;
1537 case 0x220B: *ch = 0x2208; break;
1538 case 0x220C: *ch = 0x2209; break;
1539 case 0x220D: *ch = 0x220A; break;
1540 case 0x2215: *ch = 0x29F5; break;
1541 case 0x223C: *ch = 0x223D; break;
1542 case 0x223D: *ch = 0x223C; break;
1543 case 0x2243: *ch = 0x22CD; break;
1544 case 0x2252: *ch = 0x2253; break;
1545 case 0x2253: *ch = 0x2252; break;
1546 case 0x2254: *ch = 0x2255; break;
1547 case 0x2255: *ch = 0x2254; break;
1548 case 0x2264: *ch = 0x2265; break;
1549 case 0x2265: *ch = 0x2264; break;
1550 case 0x2266: *ch = 0x2267; break;
1551 case 0x2267: *ch = 0x2266; break;
1552 case 0x2268: *ch = 0x2269; break;
1553 case 0x2269: *ch = 0x2268; break;
1554 case 0x226A: *ch = 0x226B; break;
1555 case 0x226B: *ch = 0x226A; break;
1556 case 0x226E: *ch = 0x226F; break;
1557 case 0x226F: *ch = 0x226E; break;
1558 case 0x2270: *ch = 0x2271; break;
1559 case 0x2271: *ch = 0x2270; break;
1560 case 0x2272: *ch = 0x2273; break;
1561 case 0x2273: *ch = 0x2272; break;
1562 case 0x2274: *ch = 0x2275; break;
1563 case 0x2275: *ch = 0x2274; break;
1564 case 0x2276: *ch = 0x2277; break;
1565 case 0x2277: *ch = 0x2276; break;
1566 case 0x2278: *ch = 0x2279; break;
1567 case 0x2279: *ch = 0x2278; break;
1568 case 0x227A: *ch = 0x227B; break;
1569 case 0x227B: *ch = 0x227A; break;
1570 case 0x227C: *ch = 0x227D; break;
1571 case 0x227D: *ch = 0x227C; break;
1572 case 0x227E: *ch = 0x227F; break;
1573 case 0x227F: *ch = 0x227E; break;
1574 case 0x2280: *ch = 0x2281; break;
1575 case 0x2281: *ch = 0x2280; break;
1576 case 0x2282: *ch = 0x2283; break;
1577 case 0x2283: *ch = 0x2282; break;
1578 case 0x2284: *ch = 0x2285; break;
1579 case 0x2285: *ch = 0x2284; break;
1580 case 0x2286: *ch = 0x2287; break;
1581 case 0x2287: *ch = 0x2286; break;
1582 case 0x2288: *ch = 0x2289; break;
1583 case 0x2289: *ch = 0x2288; break;
1584 case 0x228A: *ch = 0x228B; break;
1585 case 0x228B: *ch = 0x228A; break;
1586 case 0x228F: *ch = 0x2290; break;
1587 case 0x2290: *ch = 0x228F; break;
1588 case 0x2291: *ch = 0x2292; break;
1589 case 0x2292: *ch = 0x2291; break;
1590 case 0x2298: *ch = 0x29B8; break;
1591 case 0x22A2: *ch = 0x22A3; break;
1592 case 0x22A3: *ch = 0x22A2; break;
1593 case 0x22A6: *ch = 0x2ADE; break;
1594 case 0x22A8: *ch = 0x2AE4; break;
1595 case 0x22A9: *ch = 0x2AE3; break;
1596 case 0x22AB: *ch = 0x2AE5; break;
1597 case 0x22B0: *ch = 0x22B1; break;
1598 case 0x22B1: *ch = 0x22B0; break;
1599 case 0x22B2: *ch = 0x22B3; break;
1600 case 0x22B3: *ch = 0x22B2; break;
1601 case 0x22B4: *ch = 0x22B5; break;
1602 case 0x22B5: *ch = 0x22B4; break;
1603 case 0x22B6: *ch = 0x22B7; break;
1604 case 0x22B7: *ch = 0x22B6; break;
1605 case 0x22C9: *ch = 0x22CA; break;
1606 case 0x22CA: *ch = 0x22C9; break;
1607 case 0x22CB: *ch = 0x22CC; break;
1608 case 0x22CC: *ch = 0x22CB; break;
1609 case 0x22CD: *ch = 0x2243; break;
1610 case 0x22D0: *ch = 0x22D1; break;
1611 case 0x22D1: *ch = 0x22D0; break;
1612 case 0x22D6: *ch = 0x22D7; break;
1613 case 0x22D7: *ch = 0x22D6; break;
1614 case 0x22D8: *ch = 0x22D9; break;
1615 case 0x22D9: *ch = 0x22D8; break;
1616 case 0x22DA: *ch = 0x22DB; break;
1617 case 0x22DB: *ch = 0x22DA; break;
1618 case 0x22DC: *ch = 0x22DD; break;
1619 case 0x22DD: *ch = 0x22DC; break;
1620 case 0x22DE: *ch = 0x22DF; break;
1621 case 0x22DF: *ch = 0x22DE; break;
1622 case 0x22E0: *ch = 0x22E1; break;
1623 case 0x22E1: *ch = 0x22E0; break;
1624 case 0x22E2: *ch = 0x22E3; break;
1625 case 0x22E3: *ch = 0x22E2; break;
1626 case 0x22E4: *ch = 0x22E5; break;
1627 case 0x22E5: *ch = 0x22E4; break;
1628 case 0x22E6: *ch = 0x22E7; break;
1629 case 0x22E7: *ch = 0x22E6; break;
1630 case 0x22E8: *ch = 0x22E9; break;
1631 case 0x22E9: *ch = 0x22E8; break;
1632 case 0x22EA: *ch = 0x22EB; break;
1633 case 0x22EB: *ch = 0x22EA; break;
1634 case 0x22EC: *ch = 0x22ED; break;
1635 case 0x22ED: *ch = 0x22EC; break;
1636 case 0x22F0: *ch = 0x22F1; break;
1637 case 0x22F1: *ch = 0x22F0; break;
1638 case 0x22F2: *ch = 0x22FA; break;
1639 case 0x22F3: *ch = 0x22FB; break;
1640 case 0x22F4: *ch = 0x22FC; break;
1641 case 0x22F6: *ch = 0x22FD; break;
1642 case 0x22F7: *ch = 0x22FE; break;
1643 case 0x22FA: *ch = 0x22F2; break;
1644 case 0x22FB: *ch = 0x22F3; break;
1645 case 0x22FC: *ch = 0x22F4; break;
1646 case 0x22FD: *ch = 0x22F6; break;
1647 case 0x22FE: *ch = 0x22F7; break;
1648 }
1649 } else if ((*ch & 0xFF00) == 0x2300) {
1650 switch (*ch) {
1651 case 0x2308: *ch = 0x2309; break;
1652 case 0x2309: *ch = 0x2308; break;
1653 case 0x230A: *ch = 0x230B; break;
1654 case 0x230B: *ch = 0x230A; break;
1655 case 0x2329: *ch = 0x232A; break;
1656 case 0x232A: *ch = 0x2329; break;
1657 }
1658 } else if ((*ch & 0xFF00) == 0x2700) {
1659 switch (*ch) {
1660 case 0x2768: *ch = 0x2769; break;
1661 case 0x2769: *ch = 0x2768; break;
1662 case 0x276A: *ch = 0x276B; break;
1663 case 0x276B: *ch = 0x276A; break;
1664 case 0x276C: *ch = 0x276D; break;
1665 case 0x276D: *ch = 0x276C; break;
1666 case 0x276E: *ch = 0x276F; break;
1667 case 0x276F: *ch = 0x276E; break;
1668 case 0x2770: *ch = 0x2771; break;
1669 case 0x2771: *ch = 0x2770; break;
1670 case 0x2772: *ch = 0x2773; break;
1671 case 0x2773: *ch = 0x2772; break;
1672 case 0x2774: *ch = 0x2775; break;
1673 case 0x2775: *ch = 0x2774; break;
1674 case 0x27D5: *ch = 0x27D6; break;
1675 case 0x27D6: *ch = 0x27D5; break;
1676 case 0x27DD: *ch = 0x27DE; break;
1677 case 0x27DE: *ch = 0x27DD; break;
1678 case 0x27E2: *ch = 0x27E3; break;
1679 case 0x27E3: *ch = 0x27E2; break;
1680 case 0x27E4: *ch = 0x27E5; break;
1681 case 0x27E5: *ch = 0x27E4; break;
1682 case 0x27E6: *ch = 0x27E7; break;
1683 case 0x27E7: *ch = 0x27E6; break;
1684 case 0x27E8: *ch = 0x27E9; break;
1685 case 0x27E9: *ch = 0x27E8; break;
1686 case 0x27EA: *ch = 0x27EB; break;
1687 case 0x27EB: *ch = 0x27EA; break;
1688 }
1689 } else if ((*ch & 0xFF00) == 0x2900) {
1690 switch (*ch) {
1691 case 0x2983: *ch = 0x2984; break;
1692 case 0x2984: *ch = 0x2983; break;
1693 case 0x2985: *ch = 0x2986; break;
1694 case 0x2986: *ch = 0x2985; break;
1695 case 0x2987: *ch = 0x2988; break;
1696 case 0x2988: *ch = 0x2987; break;
1697 case 0x2989: *ch = 0x298A; break;
1698 case 0x298A: *ch = 0x2989; break;
1699 case 0x298B: *ch = 0x298C; break;
1700 case 0x298C: *ch = 0x298B; break;
1701 case 0x298D: *ch = 0x2990; break;
1702 case 0x298E: *ch = 0x298F; break;
1703 case 0x298F: *ch = 0x298E; break;
1704 case 0x2990: *ch = 0x298D; break;
1705 case 0x2991: *ch = 0x2992; break;
1706 case 0x2992: *ch = 0x2991; break;
1707 case 0x2993: *ch = 0x2994; break;
1708 case 0x2994: *ch = 0x2993; break;
1709 case 0x2995: *ch = 0x2996; break;
1710 case 0x2996: *ch = 0x2995; break;
1711 case 0x2997: *ch = 0x2998; break;
1712 case 0x2998: *ch = 0x2997; break;
1713 case 0x29B8: *ch = 0x2298; break;
1714 case 0x29C0: *ch = 0x29C1; break;
1715 case 0x29C1: *ch = 0x29C0; break;
1716 case 0x29C4: *ch = 0x29C5; break;
1717 case 0x29C5: *ch = 0x29C4; break;
1718 case 0x29CF: *ch = 0x29D0; break;
1719 case 0x29D0: *ch = 0x29CF; break;
1720 case 0x29D1: *ch = 0x29D2; break;
1721 case 0x29D2: *ch = 0x29D1; break;
1722 case 0x29D4: *ch = 0x29D5; break;
1723 case 0x29D5: *ch = 0x29D4; break;
1724 case 0x29D8: *ch = 0x29D9; break;
1725 case 0x29D9: *ch = 0x29D8; break;
1726 case 0x29DA: *ch = 0x29DB; break;
1727 case 0x29DB: *ch = 0x29DA; break;
1728 case 0x29F5: *ch = 0x2215; break;
1729 case 0x29F8: *ch = 0x29F9; break;
1730 case 0x29F9: *ch = 0x29F8; break;
1731 case 0x29FC: *ch = 0x29FD; break;
1732 case 0x29FD: *ch = 0x29FC; break;
1733 }
1734 } else if ((*ch & 0xFF00) == 0x2A00) {
1735 switch (*ch) {
1736 case 0x2A2B: *ch = 0x2A2C; break;
1737 case 0x2A2C: *ch = 0x2A2B; break;
1738 case 0x2A2D: *ch = 0x2A2C; break;
1739 case 0x2A2E: *ch = 0x2A2D; break;
1740 case 0x2A34: *ch = 0x2A35; break;
1741 case 0x2A35: *ch = 0x2A34; break;
1742 case 0x2A3C: *ch = 0x2A3D; break;
1743 case 0x2A3D: *ch = 0x2A3C; break;
1744 case 0x2A64: *ch = 0x2A65; break;
1745 case 0x2A65: *ch = 0x2A64; break;
1746 case 0x2A79: *ch = 0x2A7A; break;
1747 case 0x2A7A: *ch = 0x2A79; break;
1748 case 0x2A7D: *ch = 0x2A7E; break;
1749 case 0x2A7E: *ch = 0x2A7D; break;
1750 case 0x2A7F: *ch = 0x2A80; break;
1751 case 0x2A80: *ch = 0x2A7F; break;
1752 case 0x2A81: *ch = 0x2A82; break;
1753 case 0x2A82: *ch = 0x2A81; break;
1754 case 0x2A83: *ch = 0x2A84; break;
1755 case 0x2A84: *ch = 0x2A83; break;
1756 case 0x2A8B: *ch = 0x2A8C; break;
1757 case 0x2A8C: *ch = 0x2A8B; break;
1758 case 0x2A91: *ch = 0x2A92; break;
1759 case 0x2A92: *ch = 0x2A91; break;
1760 case 0x2A93: *ch = 0x2A94; break;
1761 case 0x2A94: *ch = 0x2A93; break;
1762 case 0x2A95: *ch = 0x2A96; break;
1763 case 0x2A96: *ch = 0x2A95; break;
1764 case 0x2A97: *ch = 0x2A98; break;
1765 case 0x2A98: *ch = 0x2A97; break;
1766 case 0x2A99: *ch = 0x2A9A; break;
1767 case 0x2A9A: *ch = 0x2A99; break;
1768 case 0x2A9B: *ch = 0x2A9C; break;
1769 case 0x2A9C: *ch = 0x2A9B; break;
1770 case 0x2AA1: *ch = 0x2AA2; break;
1771 case 0x2AA2: *ch = 0x2AA1; break;
1772 case 0x2AA6: *ch = 0x2AA7; break;
1773 case 0x2AA7: *ch = 0x2AA6; break;
1774 case 0x2AA8: *ch = 0x2AA9; break;
1775 case 0x2AA9: *ch = 0x2AA8; break;
1776 case 0x2AAA: *ch = 0x2AAB; break;
1777 case 0x2AAB: *ch = 0x2AAA; break;
1778 case 0x2AAC: *ch = 0x2AAD; break;
1779 case 0x2AAD: *ch = 0x2AAC; break;
1780 case 0x2AAF: *ch = 0x2AB0; break;
1781 case 0x2AB0: *ch = 0x2AAF; break;
1782 case 0x2AB3: *ch = 0x2AB4; break;
1783 case 0x2AB4: *ch = 0x2AB3; break;
1784 case 0x2ABB: *ch = 0x2ABC; break;
1785 case 0x2ABC: *ch = 0x2ABB; break;
1786 case 0x2ABD: *ch = 0x2ABE; break;
1787 case 0x2ABE: *ch = 0x2ABD; break;
1788 case 0x2ABF: *ch = 0x2AC0; break;
1789 case 0x2AC0: *ch = 0x2ABF; break;
1790 case 0x2AC1: *ch = 0x2AC2; break;
1791 case 0x2AC2: *ch = 0x2AC1; break;
1792 case 0x2AC3: *ch = 0x2AC4; break;
1793 case 0x2AC4: *ch = 0x2AC3; break;
1794 case 0x2AC5: *ch = 0x2AC6; break;
1795 case 0x2AC6: *ch = 0x2AC5; break;
1796 case 0x2ACD: *ch = 0x2ACE; break;
1797 case 0x2ACE: *ch = 0x2ACD; break;
1798 case 0x2ACF: *ch = 0x2AD0; break;
1799 case 0x2AD0: *ch = 0x2ACF; break;
1800 case 0x2AD1: *ch = 0x2AD2; break;
1801 case 0x2AD2: *ch = 0x2AD1; break;
1802 case 0x2AD3: *ch = 0x2AD4; break;
1803 case 0x2AD4: *ch = 0x2AD3; break;
1804 case 0x2AD5: *ch = 0x2AD6; break;
1805 case 0x2AD6: *ch = 0x2AD5; break;
1806 case 0x2ADE: *ch = 0x22A6; break;
1807 case 0x2AE3: *ch = 0x22A9; break;
1808 case 0x2AE4: *ch = 0x22A8; break;
1809 case 0x2AE5: *ch = 0x22AB; break;
1810 case 0x2AEC: *ch = 0x2AED; break;
1811 case 0x2AED: *ch = 0x2AEC; break;
1812 case 0x2AF7: *ch = 0x2AF8; break;
1813 case 0x2AF8: *ch = 0x2AF7; break;
1814 case 0x2AF9: *ch = 0x2AFA; break;
1815 case 0x2AFA: *ch = 0x2AF9; break;
1816 }
1817 } else if ((*ch & 0xFF00) == 0x3000) {
1818 switch (*ch) {
1819 case 0x3008: *ch = 0x3009; break;
1820 case 0x3009: *ch = 0x3008; break;
1821 case 0x300A: *ch = 0x300B; break;
1822 case 0x300B: *ch = 0x300A; break;
1823 case 0x300C: *ch = 0x300D; break;
1824 case 0x300D: *ch = 0x300C; break;
1825 case 0x300E: *ch = 0x300F; break;
1826 case 0x300F: *ch = 0x300E; break;
1827 case 0x3010: *ch = 0x3011; break;
1828 case 0x3011: *ch = 0x3010; break;
1829 case 0x3014: *ch = 0x3015; break;
1830 case 0x3015: *ch = 0x3014; break;
1831 case 0x3016: *ch = 0x3017; break;
1832 case 0x3017: *ch = 0x3016; break;
1833 case 0x3018: *ch = 0x3019; break;
1834 case 0x3019: *ch = 0x3018; break;
1835 case 0x301A: *ch = 0x301B; break;
1836 case 0x301B: *ch = 0x301A; break;
1837 }
1838 } else if ((*ch & 0xFF00) == 0xFF00) {
1839 switch (*ch) {
1840 case 0xFF08: *ch = 0xFF09; break;
1841 case 0xFF09: *ch = 0xFF08; break;
1842 case 0xFF1C: *ch = 0xFF1E; break;
1843 case 0xFF1E: *ch = 0xFF1C; break;
1844 case 0xFF3B: *ch = 0xFF3D; break;
1845 case 0xFF3D: *ch = 0xFF3B; break;
1846 case 0xFF5B: *ch = 0xFF5D; break;
1847 case 0xFF5D: *ch = 0xFF5B; break;
1848 case 0xFF5F: *ch = 0xFF60; break;
1849 case 0xFF60: *ch = 0xFF5F; break;
1850 case 0xFF62: *ch = 0xFF63; break;
1851 case 0xFF63: *ch = 0xFF62; break;
1852 }
1853 }
1854 }
1855
1856 #ifdef TEST_GETTYPE
1857
1858 #include <stdio.h>
1859 #include <assert.h>
1860
1861 int main(int argc, char **argv)
1862 {
1863 static const struct { int type; char *name; } typetoname[] = {
1864 #define TYPETONAME(X) { X , #X }
1865 TYPETONAME(L),
1866 TYPETONAME(LRE),
1867 TYPETONAME(LRO),
1868 TYPETONAME(R),
1869 TYPETONAME(AL),
1870 TYPETONAME(RLE),
1871 TYPETONAME(RLO),
1872 TYPETONAME(PDF),
1873 TYPETONAME(EN),
1874 TYPETONAME(ES),
1875 TYPETONAME(ET),
1876 TYPETONAME(AN),
1877 TYPETONAME(CS),
1878 TYPETONAME(NSM),
1879 TYPETONAME(BN),
1880 TYPETONAME(B),
1881 TYPETONAME(S),
1882 TYPETONAME(WS),
1883 TYPETONAME(ON),
1884 #undef TYPETONAME
1885 };
1886 int i;
1887
1888 for (i = 1; i < argc; i++) {
1889 unsigned long chr = strtoul(argv[i], NULL, 0);
1890 int type = getType(chr);
1891 assert(typetoname[type].type == type);
1892 printf("U+%04x: %s\n", chr, typetoname[type].name);
1893 }
1894
1895 return 0;
1896 }
1897
1898 #endif