The end condition in the binary search loop in the new getType() was
[u/mdw/putty] / minibidi.c
1 /************************************************************************
2 * $Id$
3 *
4 * ------------
5 * Description:
6 * ------------
7 * This is an implemention of Unicode's Bidirectional Algorithm
8 * (known as UAX #9).
9 *
10 * http://www.unicode.org/reports/tr9/
11 *
12 * Author: Ahmad Khalifa
13 *
14 * -----------------
15 * Revision Details: (Updated by Revision Control System)
16 * -----------------
17 * $Date$
18 * $Author$
19 * $Revision$
20 *
21 * (www.arabeyes.org - under MIT license)
22 *
23 ************************************************************************/
24
25 /*
26 * TODO:
27 * =====
28 * - Explicit marks need to be handled (they are not 100% now)
29 * - Ligatures
30 */
31
32 #include <stdlib.h> /* definition of wchar_t*/
33
34 #include "misc.h"
35
36 #define LMASK 0x3F /* Embedding Level mask */
37 #define OMASK 0xC0 /* Override mask */
38 #define OISL 0x80 /* Override is L */
39 #define OISR 0x40 /* Override is R */
40
41 /* For standalone compilation in a testing mode.
42 * Still depends on the PuTTY headers for snewn and sfree, but can avoid
43 * _linking_ with any other PuTTY code. */
44 #ifdef TEST_GETTYPE
45 #define safemalloc malloc
46 #define safefree free
47 #endif
48
49 /* Shaping Helpers */
50 #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
51 shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
52 #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
53 #define SFINAL(xh) ((xh)+1)
54 #define SINITIAL(xh) ((xh)+2)
55 #define SMEDIAL(ch) ((ch)+3)
56
57 #define leastGreaterOdd(x) ( ((x)+1) | 1 )
58 #define leastGreaterEven(x) ( ((x)+2) &~ 1 )
59
60 typedef struct bidi_char {
61 wchar_t origwc, wc;
62 unsigned short index;
63 } bidi_char;
64
65 /* function declarations */
66 void flipThisRun(bidi_char *from, unsigned char* level, int max, int count);
67 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel);
68 unsigned char getType(int ch);
69 unsigned char setOverrideBits(unsigned char level, unsigned char override);
70 int getPreviousLevel(unsigned char* level, int from);
71 int do_shape(bidi_char *line, bidi_char *to, int count);
72 int do_bidi(bidi_char *line, int count);
73 void doMirror(wchar_t* ch);
74
75 /* character types */
76 enum {
77 L,
78 LRE,
79 LRO,
80 R,
81 AL,
82 RLE,
83 RLO,
84 PDF,
85 EN,
86 ES,
87 ET,
88 AN,
89 CS,
90 NSM,
91 BN,
92 B,
93 S,
94 WS,
95 ON,
96 };
97
98 /* Shaping Types */
99 enum {
100 SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
101 SR, /* Right-Joining, ie has Isolated, Final */
102 SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
103 SU, /* Non-Joining */
104 SC /* Join-Causing, like U+0640 (TATWEEL) */
105 };
106
107 typedef struct {
108 char type;
109 wchar_t form_b;
110 } shape_node;
111
112 /* Kept near the actual table, for verification. */
113 #define SHAPE_FIRST 0x621
114 #define SHAPE_LAST 0x64A
115
116 const shape_node shapetypes[] = {
117 /* index, Typ, Iso, Ligature Index*/
118 /* 621 */ {SU, 0xFE80},
119 /* 622 */ {SR, 0xFE81},
120 /* 623 */ {SR, 0xFE83},
121 /* 624 */ {SR, 0xFE85},
122 /* 625 */ {SR, 0xFE87},
123 /* 626 */ {SD, 0xFE89},
124 /* 627 */ {SR, 0xFE8D},
125 /* 628 */ {SD, 0xFE8F},
126 /* 629 */ {SR, 0xFE93},
127 /* 62A */ {SD, 0xFE95},
128 /* 62B */ {SD, 0xFE99},
129 /* 62C */ {SD, 0xFE9D},
130 /* 62D */ {SD, 0xFEA1},
131 /* 62E */ {SD, 0xFEA5},
132 /* 62F */ {SR, 0xFEA9},
133 /* 630 */ {SR, 0xFEAB},
134 /* 631 */ {SR, 0xFEAD},
135 /* 632 */ {SR, 0xFEAF},
136 /* 633 */ {SD, 0xFEB1},
137 /* 634 */ {SD, 0xFEB5},
138 /* 635 */ {SD, 0xFEB9},
139 /* 636 */ {SD, 0xFEBD},
140 /* 637 */ {SD, 0xFEC1},
141 /* 638 */ {SD, 0xFEC5},
142 /* 639 */ {SD, 0xFEC9},
143 /* 63A */ {SD, 0xFECD},
144 /* 63B */ {SU, 0x0},
145 /* 63C */ {SU, 0x0},
146 /* 63D */ {SU, 0x0},
147 /* 63E */ {SU, 0x0},
148 /* 63F */ {SU, 0x0},
149 /* 640 */ {SC, 0x0},
150 /* 641 */ {SD, 0xFED1},
151 /* 642 */ {SD, 0xFED5},
152 /* 643 */ {SD, 0xFED9},
153 /* 644 */ {SD, 0xFEDD},
154 /* 645 */ {SD, 0xFEE1},
155 /* 646 */ {SD, 0xFEE5},
156 /* 647 */ {SD, 0xFEE9},
157 /* 648 */ {SR, 0xFEED},
158 /* 649 */ {SR, 0xFEEF}, /* SD */
159 /* 64A */ {SD, 0xFEF1},
160 };
161
162 /*
163 * Flips the text buffer, according to max level, and
164 * all higher levels
165 *
166 * Input:
167 * from: text buffer, on which to apply flipping
168 * level: resolved levels buffer
169 * max: the maximum level found in this line (should be unsigned char)
170 * count: line size in bidi_char
171 */
172 void flipThisRun(bidi_char *from, unsigned char *level, int max, int count)
173 {
174 int i, j, k, tlevel;
175 bidi_char temp;
176
177 j = i = 0;
178 while (i<count && j<count) {
179
180 /* find the start of the run of level=max */
181 tlevel = max;
182 i = j = findIndexOfRun(level, i, count, max);
183 /* find the end of the run */
184 while (i<count && tlevel <= level[i]) {
185 i++;
186 }
187 for (k = i - 1; k > j; k--, j++) {
188 temp = from[k];
189 from[k] = from[j];
190 from[j] = temp;
191 }
192 }
193 }
194
195 /*
196 * Finds the index of a run with level equals tlevel
197 */
198 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel)
199 {
200 int i;
201 for (i=start; i<count; i++) {
202 if (tlevel == level[i]) {
203 return i;
204 }
205 }
206 return count;
207 }
208
209 /*
210 * Returns the bidi character type of ch.
211 *
212 * The data table in this function is constructed from the Unicode
213 * Character Database, downloadable from unicode.org at the URL
214 *
215 * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
216 *
217 * by the following fragment of Perl:
218
219 perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
220 -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
221 -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
222 -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
223 -e '$pfl=$fl; END { &reset }; sub reset {' \
224 -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
225 -e ' if defined $runstart and $runtype ne "ON";' \
226 -e '$runstart=$runend=$num; $runtype=$type;}' \
227 UnicodeData.txt
228
229 */
230 unsigned char getType(int ch)
231 {
232 static const struct {
233 int first, last, type;
234 } lookup[] = {
235 {0x0000, 0x0008, BN},
236 {0x0009, 0x0009, S},
237 {0x000a, 0x000a, B},
238 {0x000b, 0x000b, S},
239 {0x000c, 0x000c, WS},
240 {0x000d, 0x000d, B},
241 {0x000e, 0x001b, BN},
242 {0x001c, 0x001e, B},
243 {0x001f, 0x001f, S},
244 {0x0020, 0x0020, WS},
245 {0x0023, 0x0025, ET},
246 {0x002b, 0x002b, ES},
247 {0x002c, 0x002c, CS},
248 {0x002d, 0x002d, ES},
249 {0x002e, 0x002f, CS},
250 {0x0030, 0x0039, EN},
251 {0x003a, 0x003a, CS},
252 {0x0041, 0x005a, L},
253 {0x0061, 0x007a, L},
254 {0x007f, 0x0084, BN},
255 {0x0085, 0x0085, B},
256 {0x0086, 0x009f, BN},
257 {0x00a0, 0x00a0, CS},
258 {0x00a2, 0x00a5, ET},
259 {0x00aa, 0x00aa, L},
260 {0x00ad, 0x00ad, BN},
261 {0x00b0, 0x00b1, ET},
262 {0x00b2, 0x00b3, EN},
263 {0x00b5, 0x00b5, L},
264 {0x00b9, 0x00b9, EN},
265 {0x00ba, 0x00ba, L},
266 {0x00c0, 0x00d6, L},
267 {0x00d8, 0x00f6, L},
268 {0x00f8, 0x0236, L},
269 {0x0250, 0x02b8, L},
270 {0x02bb, 0x02c1, L},
271 {0x02d0, 0x02d1, L},
272 {0x02e0, 0x02e4, L},
273 {0x02ee, 0x02ee, L},
274 {0x0300, 0x0357, NSM},
275 {0x035d, 0x036f, NSM},
276 {0x037a, 0x037a, L},
277 {0x0386, 0x0386, L},
278 {0x0388, 0x038a, L},
279 {0x038c, 0x038c, L},
280 {0x038e, 0x03a1, L},
281 {0x03a3, 0x03ce, L},
282 {0x03d0, 0x03f5, L},
283 {0x03f7, 0x03fb, L},
284 {0x0400, 0x0482, L},
285 {0x0483, 0x0486, NSM},
286 {0x0488, 0x0489, NSM},
287 {0x048a, 0x04ce, L},
288 {0x04d0, 0x04f5, L},
289 {0x04f8, 0x04f9, L},
290 {0x0500, 0x050f, L},
291 {0x0531, 0x0556, L},
292 {0x0559, 0x055f, L},
293 {0x0561, 0x0587, L},
294 {0x0589, 0x0589, L},
295 {0x0591, 0x05a1, NSM},
296 {0x05a3, 0x05b9, NSM},
297 {0x05bb, 0x05bd, NSM},
298 {0x05be, 0x05be, R},
299 {0x05bf, 0x05bf, NSM},
300 {0x05c0, 0x05c0, R},
301 {0x05c1, 0x05c2, NSM},
302 {0x05c3, 0x05c3, R},
303 {0x05c4, 0x05c4, NSM},
304 {0x05d0, 0x05ea, R},
305 {0x05f0, 0x05f4, R},
306 {0x0600, 0x0603, AL},
307 {0x060c, 0x060c, CS},
308 {0x060d, 0x060d, AL},
309 {0x0610, 0x0615, NSM},
310 {0x061b, 0x061b, AL},
311 {0x061f, 0x061f, AL},
312 {0x0621, 0x063a, AL},
313 {0x0640, 0x064a, AL},
314 {0x064b, 0x0658, NSM},
315 {0x0660, 0x0669, AN},
316 {0x066a, 0x066a, ET},
317 {0x066b, 0x066c, AN},
318 {0x066d, 0x066f, AL},
319 {0x0670, 0x0670, NSM},
320 {0x0671, 0x06d5, AL},
321 {0x06d6, 0x06dc, NSM},
322 {0x06dd, 0x06dd, AL},
323 {0x06de, 0x06e4, NSM},
324 {0x06e5, 0x06e6, AL},
325 {0x06e7, 0x06e8, NSM},
326 {0x06ea, 0x06ed, NSM},
327 {0x06ee, 0x06ef, AL},
328 {0x06f0, 0x06f9, EN},
329 {0x06fa, 0x070d, AL},
330 {0x070f, 0x070f, BN},
331 {0x0710, 0x0710, AL},
332 {0x0711, 0x0711, NSM},
333 {0x0712, 0x072f, AL},
334 {0x0730, 0x074a, NSM},
335 {0x074d, 0x074f, AL},
336 {0x0780, 0x07a5, AL},
337 {0x07a6, 0x07b0, NSM},
338 {0x07b1, 0x07b1, AL},
339 {0x0901, 0x0902, NSM},
340 {0x0903, 0x0939, L},
341 {0x093c, 0x093c, NSM},
342 {0x093d, 0x0940, L},
343 {0x0941, 0x0948, NSM},
344 {0x0949, 0x094c, L},
345 {0x094d, 0x094d, NSM},
346 {0x0950, 0x0950, L},
347 {0x0951, 0x0954, NSM},
348 {0x0958, 0x0961, L},
349 {0x0962, 0x0963, NSM},
350 {0x0964, 0x0970, L},
351 {0x0981, 0x0981, NSM},
352 {0x0982, 0x0983, L},
353 {0x0985, 0x098c, L},
354 {0x098f, 0x0990, L},
355 {0x0993, 0x09a8, L},
356 {0x09aa, 0x09b0, L},
357 {0x09b2, 0x09b2, L},
358 {0x09b6, 0x09b9, L},
359 {0x09bc, 0x09bc, NSM},
360 {0x09bd, 0x09c0, L},
361 {0x09c1, 0x09c4, NSM},
362 {0x09c7, 0x09c8, L},
363 {0x09cb, 0x09cc, L},
364 {0x09cd, 0x09cd, NSM},
365 {0x09d7, 0x09d7, L},
366 {0x09dc, 0x09dd, L},
367 {0x09df, 0x09e1, L},
368 {0x09e2, 0x09e3, NSM},
369 {0x09e6, 0x09f1, L},
370 {0x09f2, 0x09f3, ET},
371 {0x09f4, 0x09fa, L},
372 {0x0a01, 0x0a02, NSM},
373 {0x0a03, 0x0a03, L},
374 {0x0a05, 0x0a0a, L},
375 {0x0a0f, 0x0a10, L},
376 {0x0a13, 0x0a28, L},
377 {0x0a2a, 0x0a30, L},
378 {0x0a32, 0x0a33, L},
379 {0x0a35, 0x0a36, L},
380 {0x0a38, 0x0a39, L},
381 {0x0a3c, 0x0a3c, NSM},
382 {0x0a3e, 0x0a40, L},
383 {0x0a41, 0x0a42, NSM},
384 {0x0a47, 0x0a48, NSM},
385 {0x0a4b, 0x0a4d, NSM},
386 {0x0a59, 0x0a5c, L},
387 {0x0a5e, 0x0a5e, L},
388 {0x0a66, 0x0a6f, L},
389 {0x0a70, 0x0a71, NSM},
390 {0x0a72, 0x0a74, L},
391 {0x0a81, 0x0a82, NSM},
392 {0x0a83, 0x0a83, L},
393 {0x0a85, 0x0a8d, L},
394 {0x0a8f, 0x0a91, L},
395 {0x0a93, 0x0aa8, L},
396 {0x0aaa, 0x0ab0, L},
397 {0x0ab2, 0x0ab3, L},
398 {0x0ab5, 0x0ab9, L},
399 {0x0abc, 0x0abc, NSM},
400 {0x0abd, 0x0ac0, L},
401 {0x0ac1, 0x0ac5, NSM},
402 {0x0ac7, 0x0ac8, NSM},
403 {0x0ac9, 0x0ac9, L},
404 {0x0acb, 0x0acc, L},
405 {0x0acd, 0x0acd, NSM},
406 {0x0ad0, 0x0ad0, L},
407 {0x0ae0, 0x0ae1, L},
408 {0x0ae2, 0x0ae3, NSM},
409 {0x0ae6, 0x0aef, L},
410 {0x0af1, 0x0af1, ET},
411 {0x0b01, 0x0b01, NSM},
412 {0x0b02, 0x0b03, L},
413 {0x0b05, 0x0b0c, L},
414 {0x0b0f, 0x0b10, L},
415 {0x0b13, 0x0b28, L},
416 {0x0b2a, 0x0b30, L},
417 {0x0b32, 0x0b33, L},
418 {0x0b35, 0x0b39, L},
419 {0x0b3c, 0x0b3c, NSM},
420 {0x0b3d, 0x0b3e, L},
421 {0x0b3f, 0x0b3f, NSM},
422 {0x0b40, 0x0b40, L},
423 {0x0b41, 0x0b43, NSM},
424 {0x0b47, 0x0b48, L},
425 {0x0b4b, 0x0b4c, L},
426 {0x0b4d, 0x0b4d, NSM},
427 {0x0b56, 0x0b56, NSM},
428 {0x0b57, 0x0b57, L},
429 {0x0b5c, 0x0b5d, L},
430 {0x0b5f, 0x0b61, L},
431 {0x0b66, 0x0b71, L},
432 {0x0b82, 0x0b82, NSM},
433 {0x0b83, 0x0b83, L},
434 {0x0b85, 0x0b8a, L},
435 {0x0b8e, 0x0b90, L},
436 {0x0b92, 0x0b95, L},
437 {0x0b99, 0x0b9a, L},
438 {0x0b9c, 0x0b9c, L},
439 {0x0b9e, 0x0b9f, L},
440 {0x0ba3, 0x0ba4, L},
441 {0x0ba8, 0x0baa, L},
442 {0x0bae, 0x0bb5, L},
443 {0x0bb7, 0x0bb9, L},
444 {0x0bbe, 0x0bbf, L},
445 {0x0bc0, 0x0bc0, NSM},
446 {0x0bc1, 0x0bc2, L},
447 {0x0bc6, 0x0bc8, L},
448 {0x0bca, 0x0bcc, L},
449 {0x0bcd, 0x0bcd, NSM},
450 {0x0bd7, 0x0bd7, L},
451 {0x0be7, 0x0bf2, L},
452 {0x0bf9, 0x0bf9, ET},
453 {0x0c01, 0x0c03, L},
454 {0x0c05, 0x0c0c, L},
455 {0x0c0e, 0x0c10, L},
456 {0x0c12, 0x0c28, L},
457 {0x0c2a, 0x0c33, L},
458 {0x0c35, 0x0c39, L},
459 {0x0c3e, 0x0c40, NSM},
460 {0x0c41, 0x0c44, L},
461 {0x0c46, 0x0c48, NSM},
462 {0x0c4a, 0x0c4d, NSM},
463 {0x0c55, 0x0c56, NSM},
464 {0x0c60, 0x0c61, L},
465 {0x0c66, 0x0c6f, L},
466 {0x0c82, 0x0c83, L},
467 {0x0c85, 0x0c8c, L},
468 {0x0c8e, 0x0c90, L},
469 {0x0c92, 0x0ca8, L},
470 {0x0caa, 0x0cb3, L},
471 {0x0cb5, 0x0cb9, L},
472 {0x0cbc, 0x0cbc, NSM},
473 {0x0cbd, 0x0cc4, L},
474 {0x0cc6, 0x0cc8, L},
475 {0x0cca, 0x0ccb, L},
476 {0x0ccc, 0x0ccd, NSM},
477 {0x0cd5, 0x0cd6, L},
478 {0x0cde, 0x0cde, L},
479 {0x0ce0, 0x0ce1, L},
480 {0x0ce6, 0x0cef, L},
481 {0x0d02, 0x0d03, L},
482 {0x0d05, 0x0d0c, L},
483 {0x0d0e, 0x0d10, L},
484 {0x0d12, 0x0d28, L},
485 {0x0d2a, 0x0d39, L},
486 {0x0d3e, 0x0d40, L},
487 {0x0d41, 0x0d43, NSM},
488 {0x0d46, 0x0d48, L},
489 {0x0d4a, 0x0d4c, L},
490 {0x0d4d, 0x0d4d, NSM},
491 {0x0d57, 0x0d57, L},
492 {0x0d60, 0x0d61, L},
493 {0x0d66, 0x0d6f, L},
494 {0x0d82, 0x0d83, L},
495 {0x0d85, 0x0d96, L},
496 {0x0d9a, 0x0db1, L},
497 {0x0db3, 0x0dbb, L},
498 {0x0dbd, 0x0dbd, L},
499 {0x0dc0, 0x0dc6, L},
500 {0x0dca, 0x0dca, NSM},
501 {0x0dcf, 0x0dd1, L},
502 {0x0dd2, 0x0dd4, NSM},
503 {0x0dd6, 0x0dd6, NSM},
504 {0x0dd8, 0x0ddf, L},
505 {0x0df2, 0x0df4, L},
506 {0x0e01, 0x0e30, L},
507 {0x0e31, 0x0e31, NSM},
508 {0x0e32, 0x0e33, L},
509 {0x0e34, 0x0e3a, NSM},
510 {0x0e3f, 0x0e3f, ET},
511 {0x0e40, 0x0e46, L},
512 {0x0e47, 0x0e4e, NSM},
513 {0x0e4f, 0x0e5b, L},
514 {0x0e81, 0x0e82, L},
515 {0x0e84, 0x0e84, L},
516 {0x0e87, 0x0e88, L},
517 {0x0e8a, 0x0e8a, L},
518 {0x0e8d, 0x0e8d, L},
519 {0x0e94, 0x0e97, L},
520 {0x0e99, 0x0e9f, L},
521 {0x0ea1, 0x0ea3, L},
522 {0x0ea5, 0x0ea5, L},
523 {0x0ea7, 0x0ea7, L},
524 {0x0eaa, 0x0eab, L},
525 {0x0ead, 0x0eb0, L},
526 {0x0eb1, 0x0eb1, NSM},
527 {0x0eb2, 0x0eb3, L},
528 {0x0eb4, 0x0eb9, NSM},
529 {0x0ebb, 0x0ebc, NSM},
530 {0x0ebd, 0x0ebd, L},
531 {0x0ec0, 0x0ec4, L},
532 {0x0ec6, 0x0ec6, L},
533 {0x0ec8, 0x0ecd, NSM},
534 {0x0ed0, 0x0ed9, L},
535 {0x0edc, 0x0edd, L},
536 {0x0f00, 0x0f17, L},
537 {0x0f18, 0x0f19, NSM},
538 {0x0f1a, 0x0f34, L},
539 {0x0f35, 0x0f35, NSM},
540 {0x0f36, 0x0f36, L},
541 {0x0f37, 0x0f37, NSM},
542 {0x0f38, 0x0f38, L},
543 {0x0f39, 0x0f39, NSM},
544 {0x0f3e, 0x0f47, L},
545 {0x0f49, 0x0f6a, L},
546 {0x0f71, 0x0f7e, NSM},
547 {0x0f7f, 0x0f7f, L},
548 {0x0f80, 0x0f84, NSM},
549 {0x0f85, 0x0f85, L},
550 {0x0f86, 0x0f87, NSM},
551 {0x0f88, 0x0f8b, L},
552 {0x0f90, 0x0f97, NSM},
553 {0x0f99, 0x0fbc, NSM},
554 {0x0fbe, 0x0fc5, L},
555 {0x0fc6, 0x0fc6, NSM},
556 {0x0fc7, 0x0fcc, L},
557 {0x0fcf, 0x0fcf, L},
558 {0x1000, 0x1021, L},
559 {0x1023, 0x1027, L},
560 {0x1029, 0x102a, L},
561 {0x102c, 0x102c, L},
562 {0x102d, 0x1030, NSM},
563 {0x1031, 0x1031, L},
564 {0x1032, 0x1032, NSM},
565 {0x1036, 0x1037, NSM},
566 {0x1038, 0x1038, L},
567 {0x1039, 0x1039, NSM},
568 {0x1040, 0x1057, L},
569 {0x1058, 0x1059, NSM},
570 {0x10a0, 0x10c5, L},
571 {0x10d0, 0x10f8, L},
572 {0x10fb, 0x10fb, L},
573 {0x1100, 0x1159, L},
574 {0x115f, 0x11a2, L},
575 {0x11a8, 0x11f9, L},
576 {0x1200, 0x1206, L},
577 {0x1208, 0x1246, L},
578 {0x1248, 0x1248, L},
579 {0x124a, 0x124d, L},
580 {0x1250, 0x1256, L},
581 {0x1258, 0x1258, L},
582 {0x125a, 0x125d, L},
583 {0x1260, 0x1286, L},
584 {0x1288, 0x1288, L},
585 {0x128a, 0x128d, L},
586 {0x1290, 0x12ae, L},
587 {0x12b0, 0x12b0, L},
588 {0x12b2, 0x12b5, L},
589 {0x12b8, 0x12be, L},
590 {0x12c0, 0x12c0, L},
591 {0x12c2, 0x12c5, L},
592 {0x12c8, 0x12ce, L},
593 {0x12d0, 0x12d6, L},
594 {0x12d8, 0x12ee, L},
595 {0x12f0, 0x130e, L},
596 {0x1310, 0x1310, L},
597 {0x1312, 0x1315, L},
598 {0x1318, 0x131e, L},
599 {0x1320, 0x1346, L},
600 {0x1348, 0x135a, L},
601 {0x1361, 0x137c, L},
602 {0x13a0, 0x13f4, L},
603 {0x1401, 0x1676, L},
604 {0x1680, 0x1680, WS},
605 {0x1681, 0x169a, L},
606 {0x16a0, 0x16f0, L},
607 {0x1700, 0x170c, L},
608 {0x170e, 0x1711, L},
609 {0x1712, 0x1714, NSM},
610 {0x1720, 0x1731, L},
611 {0x1732, 0x1734, NSM},
612 {0x1735, 0x1736, L},
613 {0x1740, 0x1751, L},
614 {0x1752, 0x1753, NSM},
615 {0x1760, 0x176c, L},
616 {0x176e, 0x1770, L},
617 {0x1772, 0x1773, NSM},
618 {0x1780, 0x17b6, L},
619 {0x17b7, 0x17bd, NSM},
620 {0x17be, 0x17c5, L},
621 {0x17c6, 0x17c6, NSM},
622 {0x17c7, 0x17c8, L},
623 {0x17c9, 0x17d3, NSM},
624 {0x17d4, 0x17da, L},
625 {0x17db, 0x17db, ET},
626 {0x17dc, 0x17dc, L},
627 {0x17dd, 0x17dd, NSM},
628 {0x17e0, 0x17e9, L},
629 {0x180b, 0x180d, NSM},
630 {0x180e, 0x180e, WS},
631 {0x1810, 0x1819, L},
632 {0x1820, 0x1877, L},
633 {0x1880, 0x18a8, L},
634 {0x18a9, 0x18a9, NSM},
635 {0x1900, 0x191c, L},
636 {0x1920, 0x1922, NSM},
637 {0x1923, 0x1926, L},
638 {0x1927, 0x192b, NSM},
639 {0x1930, 0x1931, L},
640 {0x1932, 0x1932, NSM},
641 {0x1933, 0x1938, L},
642 {0x1939, 0x193b, NSM},
643 {0x1946, 0x196d, L},
644 {0x1970, 0x1974, L},
645 {0x1d00, 0x1d6b, L},
646 {0x1e00, 0x1e9b, L},
647 {0x1ea0, 0x1ef9, L},
648 {0x1f00, 0x1f15, L},
649 {0x1f18, 0x1f1d, L},
650 {0x1f20, 0x1f45, L},
651 {0x1f48, 0x1f4d, L},
652 {0x1f50, 0x1f57, L},
653 {0x1f59, 0x1f59, L},
654 {0x1f5b, 0x1f5b, L},
655 {0x1f5d, 0x1f5d, L},
656 {0x1f5f, 0x1f7d, L},
657 {0x1f80, 0x1fb4, L},
658 {0x1fb6, 0x1fbc, L},
659 {0x1fbe, 0x1fbe, L},
660 {0x1fc2, 0x1fc4, L},
661 {0x1fc6, 0x1fcc, L},
662 {0x1fd0, 0x1fd3, L},
663 {0x1fd6, 0x1fdb, L},
664 {0x1fe0, 0x1fec, L},
665 {0x1ff2, 0x1ff4, L},
666 {0x1ff6, 0x1ffc, L},
667 {0x2000, 0x200a, WS},
668 {0x200b, 0x200d, BN},
669 {0x200e, 0x200e, L},
670 {0x200f, 0x200f, R},
671 {0x2028, 0x2028, WS},
672 {0x2029, 0x2029, B},
673 {0x202a, 0x202a, LRE},
674 {0x202b, 0x202b, RLE},
675 {0x202c, 0x202c, PDF},
676 {0x202d, 0x202d, LRO},
677 {0x202e, 0x202e, RLO},
678 {0x202f, 0x202f, WS},
679 {0x2030, 0x2034, ET},
680 {0x2044, 0x2044, CS},
681 {0x205f, 0x205f, WS},
682 {0x2060, 0x2063, BN},
683 {0x206a, 0x206f, BN},
684 {0x2070, 0x2070, EN},
685 {0x2071, 0x2071, L},
686 {0x2074, 0x2079, EN},
687 {0x207a, 0x207b, ET},
688 {0x207f, 0x207f, L},
689 {0x2080, 0x2089, EN},
690 {0x208a, 0x208b, ET},
691 {0x20a0, 0x20b1, ET},
692 {0x20d0, 0x20ea, NSM},
693 {0x2102, 0x2102, L},
694 {0x2107, 0x2107, L},
695 {0x210a, 0x2113, L},
696 {0x2115, 0x2115, L},
697 {0x2119, 0x211d, L},
698 {0x2124, 0x2124, L},
699 {0x2126, 0x2126, L},
700 {0x2128, 0x2128, L},
701 {0x212a, 0x212d, L},
702 {0x212e, 0x212e, ET},
703 {0x212f, 0x2131, L},
704 {0x2133, 0x2139, L},
705 {0x213d, 0x213f, L},
706 {0x2145, 0x2149, L},
707 {0x2160, 0x2183, L},
708 {0x2212, 0x2213, ET},
709 {0x2336, 0x237a, L},
710 {0x2395, 0x2395, L},
711 {0x2488, 0x249b, EN},
712 {0x249c, 0x24e9, L},
713 {0x2800, 0x28ff, L},
714 {0x3000, 0x3000, WS},
715 {0x3005, 0x3007, L},
716 {0x3021, 0x3029, L},
717 {0x302a, 0x302f, NSM},
718 {0x3031, 0x3035, L},
719 {0x3038, 0x303c, L},
720 {0x3041, 0x3096, L},
721 {0x3099, 0x309a, NSM},
722 {0x309d, 0x309f, L},
723 {0x30a1, 0x30fa, L},
724 {0x30fc, 0x30ff, L},
725 {0x3105, 0x312c, L},
726 {0x3131, 0x318e, L},
727 {0x3190, 0x31b7, L},
728 {0x31f0, 0x321c, L},
729 {0x3220, 0x3243, L},
730 {0x3260, 0x327b, L},
731 {0x327f, 0x32b0, L},
732 {0x32c0, 0x32cb, L},
733 {0x32d0, 0x32fe, L},
734 {0x3300, 0x3376, L},
735 {0x337b, 0x33dd, L},
736 {0x33e0, 0x33fe, L},
737 {0x3400, 0x4db5, L},
738 {0x4e00, 0x9fa5, L},
739 {0xa000, 0xa48c, L},
740 {0xac00, 0xd7a3, L},
741 {0xd800, 0xfa2d, L},
742 {0xfa30, 0xfa6a, L},
743 {0xfb00, 0xfb06, L},
744 {0xfb13, 0xfb17, L},
745 {0xfb1d, 0xfb1d, R},
746 {0xfb1e, 0xfb1e, NSM},
747 {0xfb1f, 0xfb28, R},
748 {0xfb29, 0xfb29, ET},
749 {0xfb2a, 0xfb36, R},
750 {0xfb38, 0xfb3c, R},
751 {0xfb3e, 0xfb3e, R},
752 {0xfb40, 0xfb41, R},
753 {0xfb43, 0xfb44, R},
754 {0xfb46, 0xfb4f, R},
755 {0xfb50, 0xfbb1, AL},
756 {0xfbd3, 0xfd3d, AL},
757 {0xfd50, 0xfd8f, AL},
758 {0xfd92, 0xfdc7, AL},
759 {0xfdf0, 0xfdfc, AL},
760 {0xfe00, 0xfe0f, NSM},
761 {0xfe20, 0xfe23, NSM},
762 {0xfe50, 0xfe50, CS},
763 {0xfe52, 0xfe52, CS},
764 {0xfe55, 0xfe55, CS},
765 {0xfe5f, 0xfe5f, ET},
766 {0xfe62, 0xfe63, ET},
767 {0xfe69, 0xfe6a, ET},
768 {0xfe70, 0xfe74, AL},
769 {0xfe76, 0xfefc, AL},
770 {0xfeff, 0xfeff, BN},
771 {0xff03, 0xff05, ET},
772 {0xff0b, 0xff0b, ET},
773 {0xff0c, 0xff0c, CS},
774 {0xff0d, 0xff0d, ET},
775 {0xff0e, 0xff0e, CS},
776 {0xff0f, 0xff0f, ES},
777 {0xff10, 0xff19, EN},
778 {0xff1a, 0xff1a, CS},
779 {0xff21, 0xff3a, L},
780 {0xff41, 0xff5a, L},
781 {0xff66, 0xffbe, L},
782 {0xffc2, 0xffc7, L},
783 {0xffca, 0xffcf, L},
784 {0xffd2, 0xffd7, L},
785 {0xffda, 0xffdc, L},
786 {0xffe0, 0xffe1, ET},
787 {0xffe5, 0xffe6, ET},
788 {0x10000, 0x1000b, L},
789 {0x1000d, 0x10026, L},
790 {0x10028, 0x1003a, L},
791 {0x1003c, 0x1003d, L},
792 {0x1003f, 0x1004d, L},
793 {0x10050, 0x1005d, L},
794 {0x10080, 0x100fa, L},
795 {0x10100, 0x10100, L},
796 {0x10102, 0x10102, L},
797 {0x10107, 0x10133, L},
798 {0x10137, 0x1013f, L},
799 {0x10300, 0x1031e, L},
800 {0x10320, 0x10323, L},
801 {0x10330, 0x1034a, L},
802 {0x10380, 0x1039d, L},
803 {0x1039f, 0x1039f, L},
804 {0x10400, 0x1049d, L},
805 {0x104a0, 0x104a9, L},
806 {0x10800, 0x10805, R},
807 {0x10808, 0x10808, R},
808 {0x1080a, 0x10835, R},
809 {0x10837, 0x10838, R},
810 {0x1083c, 0x1083c, R},
811 {0x1083f, 0x1083f, R},
812 {0x1d000, 0x1d0f5, L},
813 {0x1d100, 0x1d126, L},
814 {0x1d12a, 0x1d166, L},
815 {0x1d167, 0x1d169, NSM},
816 {0x1d16a, 0x1d172, L},
817 {0x1d173, 0x1d17a, BN},
818 {0x1d17b, 0x1d182, NSM},
819 {0x1d183, 0x1d184, L},
820 {0x1d185, 0x1d18b, NSM},
821 {0x1d18c, 0x1d1a9, L},
822 {0x1d1aa, 0x1d1ad, NSM},
823 {0x1d1ae, 0x1d1dd, L},
824 {0x1d400, 0x1d454, L},
825 {0x1d456, 0x1d49c, L},
826 {0x1d49e, 0x1d49f, L},
827 {0x1d4a2, 0x1d4a2, L},
828 {0x1d4a5, 0x1d4a6, L},
829 {0x1d4a9, 0x1d4ac, L},
830 {0x1d4ae, 0x1d4b9, L},
831 {0x1d4bb, 0x1d4bb, L},
832 {0x1d4bd, 0x1d4c3, L},
833 {0x1d4c5, 0x1d505, L},
834 {0x1d507, 0x1d50a, L},
835 {0x1d50d, 0x1d514, L},
836 {0x1d516, 0x1d51c, L},
837 {0x1d51e, 0x1d539, L},
838 {0x1d53b, 0x1d53e, L},
839 {0x1d540, 0x1d544, L},
840 {0x1d546, 0x1d546, L},
841 {0x1d54a, 0x1d550, L},
842 {0x1d552, 0x1d6a3, L},
843 {0x1d6a8, 0x1d7c9, L},
844 {0x1d7ce, 0x1d7ff, EN},
845 {0x20000, 0x2a6d6, L},
846 {0x2f800, 0x2fa1d, L},
847 {0xe0001, 0xe0001, BN},
848 {0xe0020, 0xe007f, BN},
849 {0xe0100, 0xe01ef, NSM},
850 {0xf0000, 0xffffd, L},
851 {0x100000, 0x10fffd, L},
852 };
853
854 int i, j, k;
855
856 i = -1;
857 j = lenof(lookup);
858
859 while (j - i > 1) {
860 k = (i + j) / 2;
861 if (ch < lookup[k].first)
862 j = k;
863 else if (ch > lookup[k].last)
864 i = k;
865 else
866 return lookup[k].type;
867 }
868
869 /*
870 * If we reach here, the character was not in any of the
871 * intervals listed in the lookup table. This means we return
872 * ON (`Other Neutrals'). This is the appropriate code for any
873 * character genuinely not listed in the Unicode table, and
874 * also the table above has deliberately left out any
875 * characters _explicitly_ listed as ON (to save space!).
876 */
877 return ON;
878 }
879
880 /*
881 * The most significant 2 bits of each level are used to store
882 * Override status of each character
883 * This function sets the override bits of level according
884 * to the value in override, and reurns the new byte.
885 */
886 unsigned char setOverrideBits(unsigned char level, unsigned char override)
887 {
888 if (override == ON)
889 return level;
890 else if (override == R)
891 return level | OISR;
892 else if (override == L)
893 return level | OISL;
894 return level;
895 }
896
897 /*
898 * Find the most recent run of the same value in `level', and
899 * return the value _before_ it. Used to process U+202C POP
900 * DIRECTIONAL FORMATTING.
901 */
902 int getPreviousLevel(unsigned char* level, int from)
903 {
904 if (from > 0) {
905 unsigned char current = level[--from];
906
907 while (from >= 0 && level[from] == current)
908 from--;
909
910 if (from >= 0)
911 return level[from];
912
913 return -1;
914 } else
915 return -1;
916 }
917
918 /* The Main shaping function, and the only one to be used
919 * by the outside world.
920 *
921 * line: buffer to apply shaping to. this must be passed by doBidi() first
922 * to: output buffer for the shaped data
923 * count: number of characters in line
924 */
925 int do_shape(bidi_char *line, bidi_char *to, int count)
926 {
927 int i, tempShape, ligFlag;
928
929 for (ligFlag=i=0; i<count; i++) {
930 to[i] = line[i];
931 tempShape = STYPE(line[i].wc);
932 switch (tempShape) {
933 case SC:
934 break;
935
936 case SU:
937 break;
938
939 case SR:
940 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
941 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
942 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
943 else
944 to[i].wc = SISOLATED(line[i].wc);
945 break;
946
947
948 case SD:
949 /* Make Ligatures */
950 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
951 if (line[i].wc == 0x644) {
952 if (i > 0) switch (line[i-1].wc) {
953 case 0x622:
954 ligFlag = 1;
955 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
956 to[i].wc = 0xFEF6;
957 else
958 to[i].wc = 0xFEF5;
959 break;
960 case 0x623:
961 ligFlag = 1;
962 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
963 to[i].wc = 0xFEF8;
964 else
965 to[i].wc = 0xFEF7;
966 break;
967 case 0x625:
968 ligFlag = 1;
969 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
970 to[i].wc = 0xFEFA;
971 else
972 to[i].wc = 0xFEF9;
973 break;
974 case 0x627:
975 ligFlag = 1;
976 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
977 to[i].wc = 0xFEFC;
978 else
979 to[i].wc = 0xFEFB;
980 break;
981 }
982 if (ligFlag) {
983 to[i-1].wc = 0x20;
984 ligFlag = 0;
985 break;
986 }
987 }
988
989 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
990 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
991 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
992 to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
993 else
994 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
995 break;
996 }
997
998 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
999 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
1000 to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
1001 else
1002 to[i].wc = SISOLATED(line[i].wc);
1003 break;
1004
1005
1006 }
1007 }
1008 return 1;
1009 }
1010
1011 /*
1012 * The Main Bidi Function, and the only function that should
1013 * be used by the outside world.
1014 *
1015 * line: a buffer of size count containing text to apply
1016 * the Bidirectional algorithm to.
1017 */
1018
1019 int do_bidi(bidi_char *line, int count)
1020 {
1021 unsigned char* types;
1022 unsigned char* levels;
1023 unsigned char paragraphLevel;
1024 unsigned char currentEmbedding;
1025 unsigned char currentOverride;
1026 unsigned char tempType;
1027 int i, j, imax, yes, bover;
1028
1029 /* Check the presence of R or AL types as optimization */
1030 yes = 0;
1031 for (i=0; i<count; i++) {
1032 int type = getType(line[i].wc);
1033 if (type == R || type == AL) {
1034 yes = 1;
1035 break;
1036 }
1037 }
1038 if (yes == 0)
1039 return L;
1040
1041 /* Initialize types, levels */
1042 types = snewn(count, unsigned char);
1043 levels = snewn(count, unsigned char);
1044
1045 /* Rule (P1) NOT IMPLEMENTED
1046 * P1. Split the text into separate paragraphs. A paragraph separator is
1047 * kept with the previous paragraph. Within each paragraph, apply all the
1048 * other rules of this algorithm.
1049 */
1050
1051 /* Rule (P2), (P3)
1052 * P2. In each paragraph, find the first character of type L, AL, or R.
1053 * P3. If a character is found in P2 and it is of type AL or R, then set
1054 * the paragraph embedding level to one; otherwise, set it to zero.
1055 */
1056 paragraphLevel = 0;
1057 for (i=0; i<count ; i++) {
1058 int type = getType(line[i].wc);
1059 if (type == R || type == AL) {
1060 paragraphLevel = 1;
1061 break;
1062 } else if (type == L)
1063 break;
1064 }
1065
1066 /* Rule (X1)
1067 * X1. Begin by setting the current embedding level to the paragraph
1068 * embedding level. Set the directional override status to neutral.
1069 */
1070 currentEmbedding = paragraphLevel;
1071 currentOverride = ON;
1072
1073 /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
1074 * X2. With each RLE, compute the least greater odd embedding level.
1075 * X3. With each LRE, compute the least greater even embedding level.
1076 * X4. With each RLO, compute the least greater odd embedding level.
1077 * X5. With each LRO, compute the least greater even embedding level.
1078 * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
1079 * a. Set the level of the current character to the current
1080 * embedding level.
1081 * b. Whenever the directional override status is not neutral,
1082 * reset the current character type to the directional
1083 * override status.
1084 * X7. With each PDF, determine the matching embedding or override code.
1085 * If there was a valid matching code, restore (pop) the last
1086 * remembered (pushed) embedding level and directional override.
1087 * X8. All explicit directional embeddings and overrides are completely
1088 * terminated at the end of each paragraph. Paragraph separators are not
1089 * included in the embedding. (Useless here) NOT IMPLEMENTED
1090 */
1091 bover = 0;
1092 for (i=0; i<count; i++) {
1093 tempType = getType(line[i].wc);
1094 switch (tempType) {
1095 case RLE:
1096 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1097 levels[i] = setOverrideBits(levels[i], currentOverride);
1098 currentOverride = ON;
1099 break;
1100
1101 case LRE:
1102 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1103 levels[i] = setOverrideBits(levels[i], currentOverride);
1104 currentOverride = ON;
1105 break;
1106
1107 case RLO:
1108 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1109 tempType = currentOverride = R;
1110 bover = 1;
1111 break;
1112
1113 case LRO:
1114 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1115 tempType = currentOverride = L;
1116 bover = 1;
1117 break;
1118
1119 case PDF:
1120 {
1121 int prevlevel = getPreviousLevel(levels, i);
1122
1123 if (prevlevel == -1) {
1124 currentEmbedding = paragraphLevel;
1125 currentOverride = ON;
1126 } else {
1127 currentOverride = currentEmbedding & OMASK;
1128 currentEmbedding = currentEmbedding & ~OMASK;
1129 }
1130 }
1131 levels[i] = currentEmbedding;
1132 break;
1133
1134 /* Whitespace is treated as neutral for now */
1135 case WS:
1136 case S:
1137 levels[i] = currentEmbedding;
1138 tempType = ON;
1139 if (currentOverride != ON)
1140 tempType = currentOverride;
1141 break;
1142
1143 default:
1144 levels[i] = currentEmbedding;
1145 if (currentOverride != ON)
1146 tempType = currentOverride;
1147 break;
1148
1149 }
1150 types[i] = tempType;
1151 }
1152 /* this clears out all overrides, so we can use levels safely... */
1153 /* checks bover first */
1154 if (bover)
1155 for (i=0; i<count; i++)
1156 levels[i] = levels[i] & LMASK;
1157
1158 /* Rule (X9)
1159 * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
1160 * Here, they're converted to BN.
1161 */
1162 for (i=0; i<count; i++) {
1163 switch (types[i]) {
1164 case RLE:
1165 case LRE:
1166 case RLO:
1167 case LRO:
1168 case PDF:
1169 types[i] = BN;
1170 break;
1171 }
1172 }
1173
1174 /* Rule (W1)
1175 * W1. Examine each non-spacing mark (NSM) in the level run, and change
1176 * the type of the NSM to the type of the previous character. If the NSM
1177 * is at the start of the level run, it will get the type of sor.
1178 */
1179 if (types[0] == NSM)
1180 types[0] = paragraphLevel;
1181
1182 for (i=1; i<count; i++) {
1183 if (types[i] == NSM)
1184 types[i] = types[i-1];
1185 /* Is this a safe assumption?
1186 * I assumed the previous, IS a character.
1187 */
1188 }
1189
1190 /* Rule (W2)
1191 * W2. Search backwards from each instance of a European number until the
1192 * first strong type (R, L, AL, or sor) is found. If an AL is found,
1193 * change the type of the European number to Arabic number.
1194 */
1195 for (i=0; i<count; i++) {
1196 if (types[i] == EN) {
1197 j=i;
1198 while (j >= 0) {
1199 if (types[j] == AL) {
1200 types[i] = AN;
1201 break;
1202 } else if (types[j] == R || types[j] == L) {
1203 break;
1204 }
1205 j--;
1206 }
1207 }
1208 }
1209
1210 /* Rule (W3)
1211 * W3. Change all ALs to R.
1212 *
1213 * Optimization: on Rule Xn, we might set a flag on AL type
1214 * to prevent this loop in L R lines only...
1215 */
1216 for (i=0; i<count; i++) {
1217 if (types[i] == AL)
1218 types[i] = R;
1219 }
1220
1221 /* Rule (W4)
1222 * W4. A single European separator between two European numbers changes
1223 * to a European number. A single common separator between two numbers
1224 * of the same type changes to that type.
1225 */
1226 for (i=1; i<(count-1); i++) {
1227 if (types[i] == ES) {
1228 if (types[i-1] == EN && types[i+1] == EN)
1229 types[i] = EN;
1230 } else if (types[i] == CS) {
1231 if (types[i-1] == EN && types[i+1] == EN)
1232 types[i] = EN;
1233 else if (types[i-1] == AN && types[i+1] == AN)
1234 types[i] = AN;
1235 }
1236 }
1237
1238 /* Rule (W5)
1239 * W5. A sequence of European terminators adjacent to European numbers
1240 * changes to all European numbers.
1241 *
1242 * Optimization: lots here... else ifs need rearrangement
1243 */
1244 for (i=0; i<count; i++) {
1245 if (types[i] == ET) {
1246 if (i > 0 && types[i-1] == EN) {
1247 types[i] = EN;
1248 continue;
1249 } else if (i < count-1 && types[i+1] == EN) {
1250 types[i] = EN;
1251 continue;
1252 } else if (i < count-1 && types[i+1] == ET) {
1253 j=i;
1254 while (j <count && types[j] == ET) {
1255 j++;
1256 }
1257 if (types[j] == EN)
1258 types[i] = EN;
1259 }
1260 }
1261 }
1262
1263 /* Rule (W6)
1264 * W6. Otherwise, separators and terminators change to Other Neutral:
1265 */
1266 for (i=0; i<count; i++) {
1267 switch (types[i]) {
1268 case ES:
1269 case ET:
1270 case CS:
1271 types[i] = ON;
1272 break;
1273 }
1274 }
1275
1276 /* Rule (W7)
1277 * W7. Search backwards from each instance of a European number until
1278 * the first strong type (R, L, or sor) is found. If an L is found,
1279 * then change the type of the European number to L.
1280 */
1281 for (i=0; i<count; i++) {
1282 if (types[i] == EN) {
1283 j=i;
1284 while (j >= 0) {
1285 if (types[j] == L) {
1286 types[i] = L;
1287 break;
1288 } else if (types[j] == R || types[j] == AL) {
1289 break;
1290 }
1291 j--;
1292 }
1293 }
1294 }
1295
1296 /* Rule (N1)
1297 * N1. A sequence of neutrals takes the direction of the surrounding
1298 * strong text if the text on both sides has the same direction. European
1299 * and Arabic numbers are treated as though they were R.
1300 */
1301 if (count >= 2 && types[0] == ON) {
1302 if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
1303 types[0] = R;
1304 else if (types[1] == L)
1305 types[0] = L;
1306 }
1307 for (i=1; i<(count-1); i++) {
1308 if (types[i] == ON) {
1309 if (types[i-1] == L) {
1310 j=i;
1311 while (j<(count-1) && types[j] == ON) {
1312 j++;
1313 }
1314 if (types[j] == L) {
1315 while (i<j) {
1316 types[i] = L;
1317 i++;
1318 }
1319 }
1320
1321 } else if ((types[i-1] == R) ||
1322 (types[i-1] == EN) ||
1323 (types[i-1] == AN)) {
1324 j=i;
1325 while (j<(count-1) && types[j] == ON) {
1326 j++;
1327 }
1328 if ((types[j] == R) ||
1329 (types[j] == EN) ||
1330 (types[j] == AN)) {
1331 while (i<j) {
1332 types[i] = R;
1333 i++;
1334 }
1335 }
1336 }
1337 }
1338 }
1339 if (count >= 2 && types[count-1] == ON) {
1340 if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
1341 types[count-1] = R;
1342 else if (types[count-2] == L)
1343 types[count-1] = L;
1344 }
1345
1346 /* Rule (N2)
1347 * N2. Any remaining neutrals take the embedding direction.
1348 */
1349 for (i=0; i<count; i++) {
1350 if (types[i] == ON) {
1351 if ((levels[i] % 2) == 0)
1352 types[i] = L;
1353 else
1354 types[i] = R;
1355 }
1356 }
1357
1358 /* Rule (I1)
1359 * I1. For all characters with an even (left-to-right) embedding
1360 * direction, those of type R go up one level and those of type AN or
1361 * EN go up two levels.
1362 */
1363 for (i=0; i<count; i++) {
1364 if ((levels[i] % 2) == 0) {
1365 if (types[i] == R)
1366 levels[i] += 1;
1367 else if (types[i] == AN || types[i] == EN)
1368 levels[i] += 2;
1369 }
1370 }
1371
1372 /* Rule (I2)
1373 * I2. For all characters with an odd (right-to-left) embedding direction,
1374 * those of type L, EN or AN go up one level.
1375 */
1376 for (i=0; i<count; i++) {
1377 if ((levels[i] % 2) == 1) {
1378 if (types[i] == L || types[i] == EN || types[i] == AN)
1379 levels[i] += 1;
1380 }
1381 }
1382
1383 /* Rule (L1)
1384 * L1. On each line, reset the embedding level of the following characters
1385 * to the paragraph embedding level:
1386 * (1)segment separators, (2)paragraph separators,
1387 * (3)any sequence of whitespace characters preceding
1388 * a segment separator or paragraph separator,
1389 * (4)and any sequence of white space characters
1390 * at the end of the line.
1391 * The types of characters used here are the original types, not those
1392 * modified by the previous phase.
1393 */
1394 j=count-1;
1395 while (j>0 && (getType(line[j].wc) == WS)) {
1396 j--;
1397 }
1398 if (j < (count-1)) {
1399 for (j++; j<count; j++)
1400 levels[j] = paragraphLevel;
1401 }
1402 for (i=0; i<count; i++) {
1403 tempType = getType(line[i].wc);
1404 if (tempType == WS) {
1405 j=i;
1406 while (j<count && (getType(line[j].wc) == WS)) {
1407 j++;
1408 }
1409 if (j==count || getType(line[j].wc) == B ||
1410 getType(line[j].wc) == S) {
1411 for (j--; j>=i ; j--) {
1412 levels[j] = paragraphLevel;
1413 }
1414 }
1415 } else if (tempType == B || tempType == S) {
1416 levels[i] = paragraphLevel;
1417 }
1418 }
1419
1420 /* Rule (L4) NOT IMPLEMENTED
1421 * L4. A character that possesses the mirrored property as specified by
1422 * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
1423 * resolved directionality of that character is R.
1424 */
1425 /* Note: this is implemented before L2 for efficiency */
1426 for (i=0; i<count; i++)
1427 if ((levels[i] % 2) == 1)
1428 doMirror(&line[i].wc);
1429
1430 /* Rule (L2)
1431 * L2. From the highest level found in the text to the lowest odd level on
1432 * each line, including intermediate levels not actually present in the
1433 * text, reverse any contiguous sequence of characters that are at that
1434 * level or higher
1435 */
1436 /* we flip the character string and leave the level array */
1437 imax = 0;
1438 i=0;
1439 tempType = levels[0];
1440 while (i < count) {
1441 if (levels[i] > tempType) {
1442 tempType = levels[i];
1443 imax=i;
1444 }
1445 i++;
1446 }
1447 /* maximum level in tempType, its index in imax. */
1448 while (tempType > 0) { /* loop from highest level to the least odd, */
1449 /* which i assume is 1 */
1450 flipThisRun(line, levels, tempType, count);
1451 tempType--;
1452 }
1453
1454 /* Rule (L3) NOT IMPLEMENTED
1455 * L3. Combining marks applied to a right-to-left base character will at
1456 * this point precede their base character. If the rendering engine
1457 * expects them to follow the base characters in the final display
1458 * process, then the ordering of the marks and the base character must
1459 * be reversed.
1460 */
1461 sfree(types);
1462 sfree(levels);
1463 return R;
1464 }
1465
1466
1467 /*
1468 * Bad, Horrible function
1469 * takes a pointer to a character that is checked for
1470 * having a mirror glyph.
1471 */
1472 void doMirror(wchar_t* ch)
1473 {
1474 if ((*ch & 0xFF00) == 0) {
1475 switch (*ch) {
1476 case 0x0028: *ch = 0x0029; break;
1477 case 0x0029: *ch = 0x0028; break;
1478 case 0x003C: *ch = 0x003E; break;
1479 case 0x003E: *ch = 0x003C; break;
1480 case 0x005B: *ch = 0x005D; break;
1481 case 0x005D: *ch = 0x005B; break;
1482 case 0x007B: *ch = 0x007D; break;
1483 case 0x007D: *ch = 0x007B; break;
1484 case 0x00AB: *ch = 0x00BB; break;
1485 case 0x00BB: *ch = 0x00AB; break;
1486 }
1487 } else if ((*ch & 0xFF00) == 0x2000) {
1488 switch (*ch) {
1489 case 0x2039: *ch = 0x203A; break;
1490 case 0x203A: *ch = 0x2039; break;
1491 case 0x2045: *ch = 0x2046; break;
1492 case 0x2046: *ch = 0x2045; break;
1493 case 0x207D: *ch = 0x207E; break;
1494 case 0x207E: *ch = 0x207D; break;
1495 case 0x208D: *ch = 0x208E; break;
1496 case 0x208E: *ch = 0x208D; break;
1497 }
1498 } else if ((*ch & 0xFF00) == 0x2200) {
1499 switch (*ch) {
1500 case 0x2208: *ch = 0x220B; break;
1501 case 0x2209: *ch = 0x220C; break;
1502 case 0x220A: *ch = 0x220D; break;
1503 case 0x220B: *ch = 0x2208; break;
1504 case 0x220C: *ch = 0x2209; break;
1505 case 0x220D: *ch = 0x220A; break;
1506 case 0x2215: *ch = 0x29F5; break;
1507 case 0x223C: *ch = 0x223D; break;
1508 case 0x223D: *ch = 0x223C; break;
1509 case 0x2243: *ch = 0x22CD; break;
1510 case 0x2252: *ch = 0x2253; break;
1511 case 0x2253: *ch = 0x2252; break;
1512 case 0x2254: *ch = 0x2255; break;
1513 case 0x2255: *ch = 0x2254; break;
1514 case 0x2264: *ch = 0x2265; break;
1515 case 0x2265: *ch = 0x2264; break;
1516 case 0x2266: *ch = 0x2267; break;
1517 case 0x2267: *ch = 0x2266; break;
1518 case 0x2268: *ch = 0x2269; break;
1519 case 0x2269: *ch = 0x2268; break;
1520 case 0x226A: *ch = 0x226B; break;
1521 case 0x226B: *ch = 0x226A; break;
1522 case 0x226E: *ch = 0x226F; break;
1523 case 0x226F: *ch = 0x226E; break;
1524 case 0x2270: *ch = 0x2271; break;
1525 case 0x2271: *ch = 0x2270; break;
1526 case 0x2272: *ch = 0x2273; break;
1527 case 0x2273: *ch = 0x2272; break;
1528 case 0x2274: *ch = 0x2275; break;
1529 case 0x2275: *ch = 0x2274; break;
1530 case 0x2276: *ch = 0x2277; break;
1531 case 0x2277: *ch = 0x2276; break;
1532 case 0x2278: *ch = 0x2279; break;
1533 case 0x2279: *ch = 0x2278; break;
1534 case 0x227A: *ch = 0x227B; break;
1535 case 0x227B: *ch = 0x227A; break;
1536 case 0x227C: *ch = 0x227D; break;
1537 case 0x227D: *ch = 0x227C; break;
1538 case 0x227E: *ch = 0x227F; break;
1539 case 0x227F: *ch = 0x227E; break;
1540 case 0x2280: *ch = 0x2281; break;
1541 case 0x2281: *ch = 0x2280; break;
1542 case 0x2282: *ch = 0x2283; break;
1543 case 0x2283: *ch = 0x2282; break;
1544 case 0x2284: *ch = 0x2285; break;
1545 case 0x2285: *ch = 0x2284; break;
1546 case 0x2286: *ch = 0x2287; break;
1547 case 0x2287: *ch = 0x2286; break;
1548 case 0x2288: *ch = 0x2289; break;
1549 case 0x2289: *ch = 0x2288; break;
1550 case 0x228A: *ch = 0x228B; break;
1551 case 0x228B: *ch = 0x228A; break;
1552 case 0x228F: *ch = 0x2290; break;
1553 case 0x2290: *ch = 0x228F; break;
1554 case 0x2291: *ch = 0x2292; break;
1555 case 0x2292: *ch = 0x2291; break;
1556 case 0x2298: *ch = 0x29B8; break;
1557 case 0x22A2: *ch = 0x22A3; break;
1558 case 0x22A3: *ch = 0x22A2; break;
1559 case 0x22A6: *ch = 0x2ADE; break;
1560 case 0x22A8: *ch = 0x2AE4; break;
1561 case 0x22A9: *ch = 0x2AE3; break;
1562 case 0x22AB: *ch = 0x2AE5; break;
1563 case 0x22B0: *ch = 0x22B1; break;
1564 case 0x22B1: *ch = 0x22B0; break;
1565 case 0x22B2: *ch = 0x22B3; break;
1566 case 0x22B3: *ch = 0x22B2; break;
1567 case 0x22B4: *ch = 0x22B5; break;
1568 case 0x22B5: *ch = 0x22B4; break;
1569 case 0x22B6: *ch = 0x22B7; break;
1570 case 0x22B7: *ch = 0x22B6; break;
1571 case 0x22C9: *ch = 0x22CA; break;
1572 case 0x22CA: *ch = 0x22C9; break;
1573 case 0x22CB: *ch = 0x22CC; break;
1574 case 0x22CC: *ch = 0x22CB; break;
1575 case 0x22CD: *ch = 0x2243; break;
1576 case 0x22D0: *ch = 0x22D1; break;
1577 case 0x22D1: *ch = 0x22D0; break;
1578 case 0x22D6: *ch = 0x22D7; break;
1579 case 0x22D7: *ch = 0x22D6; break;
1580 case 0x22D8: *ch = 0x22D9; break;
1581 case 0x22D9: *ch = 0x22D8; break;
1582 case 0x22DA: *ch = 0x22DB; break;
1583 case 0x22DB: *ch = 0x22DA; break;
1584 case 0x22DC: *ch = 0x22DD; break;
1585 case 0x22DD: *ch = 0x22DC; break;
1586 case 0x22DE: *ch = 0x22DF; break;
1587 case 0x22DF: *ch = 0x22DE; break;
1588 case 0x22E0: *ch = 0x22E1; break;
1589 case 0x22E1: *ch = 0x22E0; break;
1590 case 0x22E2: *ch = 0x22E3; break;
1591 case 0x22E3: *ch = 0x22E2; break;
1592 case 0x22E4: *ch = 0x22E5; break;
1593 case 0x22E5: *ch = 0x22E4; break;
1594 case 0x22E6: *ch = 0x22E7; break;
1595 case 0x22E7: *ch = 0x22E6; break;
1596 case 0x22E8: *ch = 0x22E9; break;
1597 case 0x22E9: *ch = 0x22E8; break;
1598 case 0x22EA: *ch = 0x22EB; break;
1599 case 0x22EB: *ch = 0x22EA; break;
1600 case 0x22EC: *ch = 0x22ED; break;
1601 case 0x22ED: *ch = 0x22EC; break;
1602 case 0x22F0: *ch = 0x22F1; break;
1603 case 0x22F1: *ch = 0x22F0; break;
1604 case 0x22F2: *ch = 0x22FA; break;
1605 case 0x22F3: *ch = 0x22FB; break;
1606 case 0x22F4: *ch = 0x22FC; break;
1607 case 0x22F6: *ch = 0x22FD; break;
1608 case 0x22F7: *ch = 0x22FE; break;
1609 case 0x22FA: *ch = 0x22F2; break;
1610 case 0x22FB: *ch = 0x22F3; break;
1611 case 0x22FC: *ch = 0x22F4; break;
1612 case 0x22FD: *ch = 0x22F6; break;
1613 case 0x22FE: *ch = 0x22F7; break;
1614 }
1615 } else if ((*ch & 0xFF00) == 0x2300) {
1616 switch (*ch) {
1617 case 0x2308: *ch = 0x2309; break;
1618 case 0x2309: *ch = 0x2308; break;
1619 case 0x230A: *ch = 0x230B; break;
1620 case 0x230B: *ch = 0x230A; break;
1621 case 0x2329: *ch = 0x232A; break;
1622 case 0x232A: *ch = 0x2329; break;
1623 }
1624 } else if ((*ch & 0xFF00) == 0x2700) {
1625 switch (*ch) {
1626 case 0x2768: *ch = 0x2769; break;
1627 case 0x2769: *ch = 0x2768; break;
1628 case 0x276A: *ch = 0x276B; break;
1629 case 0x276B: *ch = 0x276A; break;
1630 case 0x276C: *ch = 0x276D; break;
1631 case 0x276D: *ch = 0x276C; break;
1632 case 0x276E: *ch = 0x276F; break;
1633 case 0x276F: *ch = 0x276E; break;
1634 case 0x2770: *ch = 0x2771; break;
1635 case 0x2771: *ch = 0x2770; break;
1636 case 0x2772: *ch = 0x2773; break;
1637 case 0x2773: *ch = 0x2772; break;
1638 case 0x2774: *ch = 0x2775; break;
1639 case 0x2775: *ch = 0x2774; break;
1640 case 0x27D5: *ch = 0x27D6; break;
1641 case 0x27D6: *ch = 0x27D5; break;
1642 case 0x27DD: *ch = 0x27DE; break;
1643 case 0x27DE: *ch = 0x27DD; break;
1644 case 0x27E2: *ch = 0x27E3; break;
1645 case 0x27E3: *ch = 0x27E2; break;
1646 case 0x27E4: *ch = 0x27E5; break;
1647 case 0x27E5: *ch = 0x27E4; break;
1648 case 0x27E6: *ch = 0x27E7; break;
1649 case 0x27E7: *ch = 0x27E6; break;
1650 case 0x27E8: *ch = 0x27E9; break;
1651 case 0x27E9: *ch = 0x27E8; break;
1652 case 0x27EA: *ch = 0x27EB; break;
1653 case 0x27EB: *ch = 0x27EA; break;
1654 }
1655 } else if ((*ch & 0xFF00) == 0x2900) {
1656 switch (*ch) {
1657 case 0x2983: *ch = 0x2984; break;
1658 case 0x2984: *ch = 0x2983; break;
1659 case 0x2985: *ch = 0x2986; break;
1660 case 0x2986: *ch = 0x2985; break;
1661 case 0x2987: *ch = 0x2988; break;
1662 case 0x2988: *ch = 0x2987; break;
1663 case 0x2989: *ch = 0x298A; break;
1664 case 0x298A: *ch = 0x2989; break;
1665 case 0x298B: *ch = 0x298C; break;
1666 case 0x298C: *ch = 0x298B; break;
1667 case 0x298D: *ch = 0x2990; break;
1668 case 0x298E: *ch = 0x298F; break;
1669 case 0x298F: *ch = 0x298E; break;
1670 case 0x2990: *ch = 0x298D; break;
1671 case 0x2991: *ch = 0x2992; break;
1672 case 0x2992: *ch = 0x2991; break;
1673 case 0x2993: *ch = 0x2994; break;
1674 case 0x2994: *ch = 0x2993; break;
1675 case 0x2995: *ch = 0x2996; break;
1676 case 0x2996: *ch = 0x2995; break;
1677 case 0x2997: *ch = 0x2998; break;
1678 case 0x2998: *ch = 0x2997; break;
1679 case 0x29B8: *ch = 0x2298; break;
1680 case 0x29C0: *ch = 0x29C1; break;
1681 case 0x29C1: *ch = 0x29C0; break;
1682 case 0x29C4: *ch = 0x29C5; break;
1683 case 0x29C5: *ch = 0x29C4; break;
1684 case 0x29CF: *ch = 0x29D0; break;
1685 case 0x29D0: *ch = 0x29CF; break;
1686 case 0x29D1: *ch = 0x29D2; break;
1687 case 0x29D2: *ch = 0x29D1; break;
1688 case 0x29D4: *ch = 0x29D5; break;
1689 case 0x29D5: *ch = 0x29D4; break;
1690 case 0x29D8: *ch = 0x29D9; break;
1691 case 0x29D9: *ch = 0x29D8; break;
1692 case 0x29DA: *ch = 0x29DB; break;
1693 case 0x29DB: *ch = 0x29DA; break;
1694 case 0x29F5: *ch = 0x2215; break;
1695 case 0x29F8: *ch = 0x29F9; break;
1696 case 0x29F9: *ch = 0x29F8; break;
1697 case 0x29FC: *ch = 0x29FD; break;
1698 case 0x29FD: *ch = 0x29FC; break;
1699 }
1700 } else if ((*ch & 0xFF00) == 0x2A00) {
1701 switch (*ch) {
1702 case 0x2A2B: *ch = 0x2A2C; break;
1703 case 0x2A2C: *ch = 0x2A2B; break;
1704 case 0x2A2D: *ch = 0x2A2C; break;
1705 case 0x2A2E: *ch = 0x2A2D; break;
1706 case 0x2A34: *ch = 0x2A35; break;
1707 case 0x2A35: *ch = 0x2A34; break;
1708 case 0x2A3C: *ch = 0x2A3D; break;
1709 case 0x2A3D: *ch = 0x2A3C; break;
1710 case 0x2A64: *ch = 0x2A65; break;
1711 case 0x2A65: *ch = 0x2A64; break;
1712 case 0x2A79: *ch = 0x2A7A; break;
1713 case 0x2A7A: *ch = 0x2A79; break;
1714 case 0x2A7D: *ch = 0x2A7E; break;
1715 case 0x2A7E: *ch = 0x2A7D; break;
1716 case 0x2A7F: *ch = 0x2A80; break;
1717 case 0x2A80: *ch = 0x2A7F; break;
1718 case 0x2A81: *ch = 0x2A82; break;
1719 case 0x2A82: *ch = 0x2A81; break;
1720 case 0x2A83: *ch = 0x2A84; break;
1721 case 0x2A84: *ch = 0x2A83; break;
1722 case 0x2A8B: *ch = 0x2A8C; break;
1723 case 0x2A8C: *ch = 0x2A8B; break;
1724 case 0x2A91: *ch = 0x2A92; break;
1725 case 0x2A92: *ch = 0x2A91; break;
1726 case 0x2A93: *ch = 0x2A94; break;
1727 case 0x2A94: *ch = 0x2A93; break;
1728 case 0x2A95: *ch = 0x2A96; break;
1729 case 0x2A96: *ch = 0x2A95; break;
1730 case 0x2A97: *ch = 0x2A98; break;
1731 case 0x2A98: *ch = 0x2A97; break;
1732 case 0x2A99: *ch = 0x2A9A; break;
1733 case 0x2A9A: *ch = 0x2A99; break;
1734 case 0x2A9B: *ch = 0x2A9C; break;
1735 case 0x2A9C: *ch = 0x2A9B; break;
1736 case 0x2AA1: *ch = 0x2AA2; break;
1737 case 0x2AA2: *ch = 0x2AA1; break;
1738 case 0x2AA6: *ch = 0x2AA7; break;
1739 case 0x2AA7: *ch = 0x2AA6; break;
1740 case 0x2AA8: *ch = 0x2AA9; break;
1741 case 0x2AA9: *ch = 0x2AA8; break;
1742 case 0x2AAA: *ch = 0x2AAB; break;
1743 case 0x2AAB: *ch = 0x2AAA; break;
1744 case 0x2AAC: *ch = 0x2AAD; break;
1745 case 0x2AAD: *ch = 0x2AAC; break;
1746 case 0x2AAF: *ch = 0x2AB0; break;
1747 case 0x2AB0: *ch = 0x2AAF; break;
1748 case 0x2AB3: *ch = 0x2AB4; break;
1749 case 0x2AB4: *ch = 0x2AB3; break;
1750 case 0x2ABB: *ch = 0x2ABC; break;
1751 case 0x2ABC: *ch = 0x2ABB; break;
1752 case 0x2ABD: *ch = 0x2ABE; break;
1753 case 0x2ABE: *ch = 0x2ABD; break;
1754 case 0x2ABF: *ch = 0x2AC0; break;
1755 case 0x2AC0: *ch = 0x2ABF; break;
1756 case 0x2AC1: *ch = 0x2AC2; break;
1757 case 0x2AC2: *ch = 0x2AC1; break;
1758 case 0x2AC3: *ch = 0x2AC4; break;
1759 case 0x2AC4: *ch = 0x2AC3; break;
1760 case 0x2AC5: *ch = 0x2AC6; break;
1761 case 0x2AC6: *ch = 0x2AC5; break;
1762 case 0x2ACD: *ch = 0x2ACE; break;
1763 case 0x2ACE: *ch = 0x2ACD; break;
1764 case 0x2ACF: *ch = 0x2AD0; break;
1765 case 0x2AD0: *ch = 0x2ACF; break;
1766 case 0x2AD1: *ch = 0x2AD2; break;
1767 case 0x2AD2: *ch = 0x2AD1; break;
1768 case 0x2AD3: *ch = 0x2AD4; break;
1769 case 0x2AD4: *ch = 0x2AD3; break;
1770 case 0x2AD5: *ch = 0x2AD6; break;
1771 case 0x2AD6: *ch = 0x2AD5; break;
1772 case 0x2ADE: *ch = 0x22A6; break;
1773 case 0x2AE3: *ch = 0x22A9; break;
1774 case 0x2AE4: *ch = 0x22A8; break;
1775 case 0x2AE5: *ch = 0x22AB; break;
1776 case 0x2AEC: *ch = 0x2AED; break;
1777 case 0x2AED: *ch = 0x2AEC; break;
1778 case 0x2AF7: *ch = 0x2AF8; break;
1779 case 0x2AF8: *ch = 0x2AF7; break;
1780 case 0x2AF9: *ch = 0x2AFA; break;
1781 case 0x2AFA: *ch = 0x2AF9; break;
1782 }
1783 } else if ((*ch & 0xFF00) == 0x3000) {
1784 switch (*ch) {
1785 case 0x3008: *ch = 0x3009; break;
1786 case 0x3009: *ch = 0x3008; break;
1787 case 0x300A: *ch = 0x300B; break;
1788 case 0x300B: *ch = 0x300A; break;
1789 case 0x300C: *ch = 0x300D; break;
1790 case 0x300D: *ch = 0x300C; break;
1791 case 0x300E: *ch = 0x300F; break;
1792 case 0x300F: *ch = 0x300E; break;
1793 case 0x3010: *ch = 0x3011; break;
1794 case 0x3011: *ch = 0x3010; break;
1795 case 0x3014: *ch = 0x3015; break;
1796 case 0x3015: *ch = 0x3014; break;
1797 case 0x3016: *ch = 0x3017; break;
1798 case 0x3017: *ch = 0x3016; break;
1799 case 0x3018: *ch = 0x3019; break;
1800 case 0x3019: *ch = 0x3018; break;
1801 case 0x301A: *ch = 0x301B; break;
1802 case 0x301B: *ch = 0x301A; break;
1803 }
1804 } else if ((*ch & 0xFF00) == 0xFF00) {
1805 switch (*ch) {
1806 case 0xFF08: *ch = 0xFF09; break;
1807 case 0xFF09: *ch = 0xFF08; break;
1808 case 0xFF1C: *ch = 0xFF1E; break;
1809 case 0xFF1E: *ch = 0xFF1C; break;
1810 case 0xFF3B: *ch = 0xFF3D; break;
1811 case 0xFF3D: *ch = 0xFF3B; break;
1812 case 0xFF5B: *ch = 0xFF5D; break;
1813 case 0xFF5D: *ch = 0xFF5B; break;
1814 case 0xFF5F: *ch = 0xFF60; break;
1815 case 0xFF60: *ch = 0xFF5F; break;
1816 case 0xFF62: *ch = 0xFF63; break;
1817 case 0xFF63: *ch = 0xFF62; break;
1818 }
1819 }
1820 }
1821
1822 #ifdef TEST_GETTYPE
1823
1824 #include <stdio.h>
1825 #include <assert.h>
1826
1827 int main(int argc, char **argv)
1828 {
1829 static const struct { int type; char *name; } typetoname[] = {
1830 #define TYPETONAME(X) { X , #X }
1831 TYPETONAME(L),
1832 TYPETONAME(LRE),
1833 TYPETONAME(LRO),
1834 TYPETONAME(R),
1835 TYPETONAME(AL),
1836 TYPETONAME(RLE),
1837 TYPETONAME(RLO),
1838 TYPETONAME(PDF),
1839 TYPETONAME(EN),
1840 TYPETONAME(ES),
1841 TYPETONAME(ET),
1842 TYPETONAME(AN),
1843 TYPETONAME(CS),
1844 TYPETONAME(NSM),
1845 TYPETONAME(BN),
1846 TYPETONAME(B),
1847 TYPETONAME(S),
1848 TYPETONAME(WS),
1849 TYPETONAME(ON),
1850 #undef TYPETONAME
1851 };
1852 int i;
1853
1854 for (i = 1; i < argc; i++) {
1855 unsigned long chr = strtoul(argv[i], NULL, 0);
1856 int type = getType(chr);
1857 assert(typetoname[type].type == type);
1858 printf("U+%04x: %s\n", chr, typetoname[type].name);
1859 }
1860
1861 return 0;
1862 }
1863
1864 #endif