The documentation was still claiming that we don't support x11-auth.
[u/mdw/putty] / minibidi.c
CommitLineData
f0fccd51 1/************************************************************************
ed47c4e2 2 * $Id$
f0fccd51 3 *
4 * ------------
5 * Description:
6 * ------------
7 * This is an implemention of Unicode's Bidirectional Algorithm
8 * (known as UAX #9).
9 *
10 * http://www.unicode.org/reports/tr9/
cd985a32 11 *
f0fccd51 12 * Author: Ahmad Khalifa
13 *
14 * -----------------
15 * Revision Details: (Updated by Revision Control System)
16 * -----------------
ed47c4e2 17 * $Date$
18 * $Author$
19 * $Revision$
f0fccd51 20 *
21 * (www.arabeyes.org - under MIT license)
22 *
23 ************************************************************************/
24
25/*
26 * TODO:
27 * =====
28 * - Explicit marks need to be handled (they are not 100% now)
29 * - Ligatures
30 */
31
31626f30 32#include <stdlib.h> /* definition of wchar_t*/
f0fccd51 33
31626f30 34#include "misc.h"
35
36#define LMASK 0x3F /* Embedding Level mask */
37#define OMASK 0xC0 /* Override mask */
38#define OISL 0x80 /* Override is L */
39#define OISR 0x40 /* Override is R */
40
197c43dd 41/* For standalone compilation in a testing mode.
42 * Still depends on the PuTTY headers for snewn and sfree, but can avoid
43 * _linking_ with any other PuTTY code. */
44#ifdef TEST_GETTYPE
45#define safemalloc malloc
46#define safefree free
47#endif
48
31626f30 49/* Shaping Helpers */
50#define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
51shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
52#define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
53#define SFINAL(xh) ((xh)+1)
54#define SINITIAL(xh) ((xh)+2)
55#define SMEDIAL(ch) ((ch)+3)
56
57#define leastGreaterOdd(x) ( ((x)+1) | 1 )
58#define leastGreaterEven(x) ( ((x)+2) &~ 1 )
59
60typedef struct bidi_char {
61 wchar_t origwc, wc;
62 unsigned short index;
63} bidi_char;
64
65/* function declarations */
66void flipThisRun(bidi_char *from, unsigned char* level, int max, int count);
67int findIndexOfRun(unsigned char* level , int start, int count, int tlevel);
7bc1ffdf 68unsigned char getType(int ch);
31626f30 69unsigned char setOverrideBits(unsigned char level, unsigned char override);
70int getPreviousLevel(unsigned char* level, int from);
31626f30 71int do_shape(bidi_char *line, bidi_char *to, int count);
72int do_bidi(bidi_char *line, int count);
73void doMirror(wchar_t* ch);
74
75/* character types */
76enum {
77 L,
78 LRE,
79 LRO,
80 R,
81 AL,
82 RLE,
83 RLO,
84 PDF,
85 EN,
86 ES,
87 ET,
88 AN,
89 CS,
90 NSM,
91 BN,
92 B,
93 S,
94 WS,
d7891209 95 ON
31626f30 96};
97
98/* Shaping Types */
99enum {
100 SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
101 SR, /* Right-Joining, ie has Isolated, Final */
102 SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
103 SU, /* Non-Joining */
104 SC /* Join-Causing, like U+0640 (TATWEEL) */
105};
106
107typedef struct {
108 char type;
109 wchar_t form_b;
110} shape_node;
111
112/* Kept near the actual table, for verification. */
113#define SHAPE_FIRST 0x621
33638fbe 114#define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1)
31626f30 115
116const shape_node shapetypes[] = {
117 /* index, Typ, Iso, Ligature Index*/
118 /* 621 */ {SU, 0xFE80},
119 /* 622 */ {SR, 0xFE81},
120 /* 623 */ {SR, 0xFE83},
121 /* 624 */ {SR, 0xFE85},
122 /* 625 */ {SR, 0xFE87},
123 /* 626 */ {SD, 0xFE89},
124 /* 627 */ {SR, 0xFE8D},
125 /* 628 */ {SD, 0xFE8F},
126 /* 629 */ {SR, 0xFE93},
127 /* 62A */ {SD, 0xFE95},
128 /* 62B */ {SD, 0xFE99},
129 /* 62C */ {SD, 0xFE9D},
130 /* 62D */ {SD, 0xFEA1},
131 /* 62E */ {SD, 0xFEA5},
132 /* 62F */ {SR, 0xFEA9},
133 /* 630 */ {SR, 0xFEAB},
134 /* 631 */ {SR, 0xFEAD},
135 /* 632 */ {SR, 0xFEAF},
136 /* 633 */ {SD, 0xFEB1},
137 /* 634 */ {SD, 0xFEB5},
138 /* 635 */ {SD, 0xFEB9},
139 /* 636 */ {SD, 0xFEBD},
140 /* 637 */ {SD, 0xFEC1},
141 /* 638 */ {SD, 0xFEC5},
142 /* 639 */ {SD, 0xFEC9},
143 /* 63A */ {SD, 0xFECD},
144 /* 63B */ {SU, 0x0},
145 /* 63C */ {SU, 0x0},
146 /* 63D */ {SU, 0x0},
147 /* 63E */ {SU, 0x0},
148 /* 63F */ {SU, 0x0},
149 /* 640 */ {SC, 0x0},
150 /* 641 */ {SD, 0xFED1},
151 /* 642 */ {SD, 0xFED5},
152 /* 643 */ {SD, 0xFED9},
153 /* 644 */ {SD, 0xFEDD},
154 /* 645 */ {SD, 0xFEE1},
155 /* 646 */ {SD, 0xFEE5},
156 /* 647 */ {SD, 0xFEE9},
157 /* 648 */ {SR, 0xFEED},
158 /* 649 */ {SR, 0xFEEF}, /* SD */
33638fbe 159 /* 64A */ {SD, 0xFEF1},
160 /* 64B */ {SU, 0x0},
161 /* 64C */ {SU, 0x0},
162 /* 64D */ {SU, 0x0},
163 /* 64E */ {SU, 0x0},
164 /* 64F */ {SU, 0x0},
165 /* 650 */ {SU, 0x0},
166 /* 651 */ {SU, 0x0},
167 /* 652 */ {SU, 0x0},
168 /* 653 */ {SU, 0x0},
169 /* 654 */ {SU, 0x0},
170 /* 655 */ {SU, 0x0},
171 /* 656 */ {SU, 0x0},
172 /* 657 */ {SU, 0x0},
173 /* 658 */ {SU, 0x0},
174 /* 659 */ {SU, 0x0},
175 /* 65A */ {SU, 0x0},
176 /* 65B */ {SU, 0x0},
177 /* 65C */ {SU, 0x0},
178 /* 65D */ {SU, 0x0},
179 /* 65E */ {SU, 0x0},
180 /* 65F */ {SU, 0x0},
181 /* 660 */ {SU, 0x0},
182 /* 661 */ {SU, 0x0},
183 /* 662 */ {SU, 0x0},
184 /* 663 */ {SU, 0x0},
185 /* 664 */ {SU, 0x0},
186 /* 665 */ {SU, 0x0},
187 /* 666 */ {SU, 0x0},
188 /* 667 */ {SU, 0x0},
189 /* 668 */ {SU, 0x0},
190 /* 669 */ {SU, 0x0},
191 /* 66A */ {SU, 0x0},
192 /* 66B */ {SU, 0x0},
193 /* 66C */ {SU, 0x0},
194 /* 66D */ {SU, 0x0},
195 /* 66E */ {SU, 0x0},
196 /* 66F */ {SU, 0x0},
197 /* 670 */ {SU, 0x0},
198 /* 671 */ {SR, 0xFB50},
199 /* 672 */ {SU, 0x0},
200 /* 673 */ {SU, 0x0},
201 /* 674 */ {SU, 0x0},
202 /* 675 */ {SU, 0x0},
203 /* 676 */ {SU, 0x0},
204 /* 677 */ {SU, 0x0},
205 /* 678 */ {SU, 0x0},
206 /* 679 */ {SD, 0xFB66},
207 /* 67A */ {SD, 0xFB5E},
208 /* 67B */ {SD, 0xFB52},
209 /* 67C */ {SU, 0x0},
210 /* 67D */ {SU, 0x0},
211 /* 67E */ {SD, 0xFB56},
212 /* 67F */ {SD, 0xFB62},
213 /* 680 */ {SD, 0xFB5A},
214 /* 681 */ {SU, 0x0},
215 /* 682 */ {SU, 0x0},
216 /* 683 */ {SD, 0xFB76},
217 /* 684 */ {SD, 0xFB72},
218 /* 685 */ {SU, 0x0},
219 /* 686 */ {SD, 0xFB7A},
220 /* 687 */ {SD, 0xFB7E},
221 /* 688 */ {SR, 0xFB88},
222 /* 689 */ {SU, 0x0},
223 /* 68A */ {SU, 0x0},
224 /* 68B */ {SU, 0x0},
225 /* 68C */ {SR, 0xFB84},
226 /* 68D */ {SR, 0xFB82},
227 /* 68E */ {SR, 0xFB86},
228 /* 68F */ {SU, 0x0},
229 /* 690 */ {SU, 0x0},
230 /* 691 */ {SR, 0xFB8C},
231 /* 692 */ {SU, 0x0},
232 /* 693 */ {SU, 0x0},
233 /* 694 */ {SU, 0x0},
234 /* 695 */ {SU, 0x0},
235 /* 696 */ {SU, 0x0},
236 /* 697 */ {SU, 0x0},
237 /* 698 */ {SR, 0xFB8A},
238 /* 699 */ {SU, 0x0},
239 /* 69A */ {SU, 0x0},
240 /* 69B */ {SU, 0x0},
241 /* 69C */ {SU, 0x0},
242 /* 69D */ {SU, 0x0},
243 /* 69E */ {SU, 0x0},
244 /* 69F */ {SU, 0x0},
245 /* 6A0 */ {SU, 0x0},
246 /* 6A1 */ {SU, 0x0},
247 /* 6A2 */ {SU, 0x0},
248 /* 6A3 */ {SU, 0x0},
249 /* 6A4 */ {SD, 0xFB6A},
250 /* 6A5 */ {SU, 0x0},
251 /* 6A6 */ {SD, 0xFB6E},
252 /* 6A7 */ {SU, 0x0},
253 /* 6A8 */ {SU, 0x0},
254 /* 6A9 */ {SD, 0xFB8E},
255 /* 6AA */ {SU, 0x0},
256 /* 6AB */ {SU, 0x0},
257 /* 6AC */ {SU, 0x0},
258 /* 6AD */ {SD, 0xFBD3},
259 /* 6AE */ {SU, 0x0},
260 /* 6AF */ {SD, 0xFB92},
261 /* 6B0 */ {SU, 0x0},
262 /* 6B1 */ {SD, 0xFB9A},
263 /* 6B2 */ {SU, 0x0},
264 /* 6B3 */ {SD, 0xFB96},
265 /* 6B4 */ {SU, 0x0},
266 /* 6B5 */ {SU, 0x0},
267 /* 6B6 */ {SU, 0x0},
268 /* 6B7 */ {SU, 0x0},
269 /* 6B8 */ {SU, 0x0},
270 /* 6B9 */ {SU, 0x0},
271 /* 6BA */ {SR, 0xFB9E},
272 /* 6BB */ {SD, 0xFBA0},
273 /* 6BC */ {SU, 0x0},
274 /* 6BD */ {SU, 0x0},
275 /* 6BE */ {SD, 0xFBAA},
276 /* 6BF */ {SU, 0x0},
277 /* 6C0 */ {SR, 0xFBA4},
278 /* 6C1 */ {SD, 0xFBA6},
279 /* 6C2 */ {SU, 0x0},
280 /* 6C3 */ {SU, 0x0},
281 /* 6C4 */ {SU, 0x0},
282 /* 6C5 */ {SR, 0xFBE0},
283 /* 6C6 */ {SR, 0xFBD9},
284 /* 6C7 */ {SR, 0xFBD7},
285 /* 6C8 */ {SR, 0xFBDB},
286 /* 6C9 */ {SR, 0xFBE2},
287 /* 6CA */ {SU, 0x0},
288 /* 6CB */ {SR, 0xFBDE},
289 /* 6CC */ {SD, 0xFBFC},
290 /* 6CD */ {SU, 0x0},
291 /* 6CE */ {SU, 0x0},
292 /* 6CF */ {SU, 0x0},
293 /* 6D0 */ {SU, 0x0},
294 /* 6D1 */ {SU, 0x0},
295 /* 6D2 */ {SR, 0xFBAE},
31626f30 296};
297
298/*
f0fccd51 299 * Flips the text buffer, according to max level, and
300 * all higher levels
cd985a32 301 *
f0fccd51 302 * Input:
303 * from: text buffer, on which to apply flipping
304 * level: resolved levels buffer
305 * max: the maximum level found in this line (should be unsigned char)
306 * count: line size in bidi_char
307 */
308void flipThisRun(bidi_char *from, unsigned char *level, int max, int count)
309{
31626f30 310 int i, j, k, tlevel;
f0fccd51 311 bidi_char temp;
312
313 j = i = 0;
cd985a32 314 while (i<count && j<count) {
f0fccd51 315
316 /* find the start of the run of level=max */
317 tlevel = max;
318 i = j = findIndexOfRun(level, i, count, max);
319 /* find the end of the run */
cd985a32 320 while (i<count && tlevel <= level[i]) {
f0fccd51 321 i++;
322 }
31626f30 323 for (k = i - 1; k > j; k--, j++) {
324 temp = from[k];
325 from[k] = from[j];
326 from[j] = temp;
f0fccd51 327 }
328 }
329}
330
331/*
332 * Finds the index of a run with level equals tlevel
333 */
334int findIndexOfRun(unsigned char* level , int start, int count, int tlevel)
335{
336 int i;
cd985a32 337 for (i=start; i<count; i++) {
338 if (tlevel == level[i]) {
f0fccd51 339 return i;
340 }
341 }
342 return count;
343}
344
345/*
7bc1ffdf 346 * Returns the bidi character type of ch.
347 *
348 * The data table in this function is constructed from the Unicode
349 * Character Database, downloadable from unicode.org at the URL
350 *
351 * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
352 *
353 * by the following fragment of Perl:
354
355perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
356 -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
357 -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
358 -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
359 -e '$pfl=$fl; END { &reset }; sub reset {' \
360 -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
361 -e ' if defined $runstart and $runtype ne "ON";' \
362 -e '$runstart=$runend=$num; $runtype=$type;}' \
363 UnicodeData.txt
364
f0fccd51 365 */
7bc1ffdf 366unsigned char getType(int ch)
f0fccd51 367{
7bc1ffdf 368 static const struct {
369 int first, last, type;
370 } lookup[] = {
371 {0x0000, 0x0008, BN},
372 {0x0009, 0x0009, S},
373 {0x000a, 0x000a, B},
374 {0x000b, 0x000b, S},
375 {0x000c, 0x000c, WS},
376 {0x000d, 0x000d, B},
377 {0x000e, 0x001b, BN},
378 {0x001c, 0x001e, B},
379 {0x001f, 0x001f, S},
380 {0x0020, 0x0020, WS},
381 {0x0023, 0x0025, ET},
382 {0x002b, 0x002b, ES},
383 {0x002c, 0x002c, CS},
384 {0x002d, 0x002d, ES},
385 {0x002e, 0x002f, CS},
386 {0x0030, 0x0039, EN},
387 {0x003a, 0x003a, CS},
388 {0x0041, 0x005a, L},
389 {0x0061, 0x007a, L},
390 {0x007f, 0x0084, BN},
391 {0x0085, 0x0085, B},
392 {0x0086, 0x009f, BN},
393 {0x00a0, 0x00a0, CS},
394 {0x00a2, 0x00a5, ET},
395 {0x00aa, 0x00aa, L},
396 {0x00ad, 0x00ad, BN},
397 {0x00b0, 0x00b1, ET},
398 {0x00b2, 0x00b3, EN},
399 {0x00b5, 0x00b5, L},
400 {0x00b9, 0x00b9, EN},
401 {0x00ba, 0x00ba, L},
402 {0x00c0, 0x00d6, L},
403 {0x00d8, 0x00f6, L},
404 {0x00f8, 0x0236, L},
405 {0x0250, 0x02b8, L},
406 {0x02bb, 0x02c1, L},
407 {0x02d0, 0x02d1, L},
408 {0x02e0, 0x02e4, L},
409 {0x02ee, 0x02ee, L},
410 {0x0300, 0x0357, NSM},
411 {0x035d, 0x036f, NSM},
412 {0x037a, 0x037a, L},
413 {0x0386, 0x0386, L},
414 {0x0388, 0x038a, L},
415 {0x038c, 0x038c, L},
416 {0x038e, 0x03a1, L},
417 {0x03a3, 0x03ce, L},
418 {0x03d0, 0x03f5, L},
419 {0x03f7, 0x03fb, L},
420 {0x0400, 0x0482, L},
421 {0x0483, 0x0486, NSM},
422 {0x0488, 0x0489, NSM},
423 {0x048a, 0x04ce, L},
424 {0x04d0, 0x04f5, L},
425 {0x04f8, 0x04f9, L},
426 {0x0500, 0x050f, L},
427 {0x0531, 0x0556, L},
428 {0x0559, 0x055f, L},
429 {0x0561, 0x0587, L},
430 {0x0589, 0x0589, L},
431 {0x0591, 0x05a1, NSM},
432 {0x05a3, 0x05b9, NSM},
433 {0x05bb, 0x05bd, NSM},
434 {0x05be, 0x05be, R},
435 {0x05bf, 0x05bf, NSM},
436 {0x05c0, 0x05c0, R},
437 {0x05c1, 0x05c2, NSM},
438 {0x05c3, 0x05c3, R},
439 {0x05c4, 0x05c4, NSM},
440 {0x05d0, 0x05ea, R},
441 {0x05f0, 0x05f4, R},
442 {0x0600, 0x0603, AL},
443 {0x060c, 0x060c, CS},
444 {0x060d, 0x060d, AL},
445 {0x0610, 0x0615, NSM},
446 {0x061b, 0x061b, AL},
447 {0x061f, 0x061f, AL},
448 {0x0621, 0x063a, AL},
449 {0x0640, 0x064a, AL},
450 {0x064b, 0x0658, NSM},
451 {0x0660, 0x0669, AN},
452 {0x066a, 0x066a, ET},
453 {0x066b, 0x066c, AN},
454 {0x066d, 0x066f, AL},
455 {0x0670, 0x0670, NSM},
456 {0x0671, 0x06d5, AL},
457 {0x06d6, 0x06dc, NSM},
458 {0x06dd, 0x06dd, AL},
459 {0x06de, 0x06e4, NSM},
460 {0x06e5, 0x06e6, AL},
461 {0x06e7, 0x06e8, NSM},
462 {0x06ea, 0x06ed, NSM},
463 {0x06ee, 0x06ef, AL},
464 {0x06f0, 0x06f9, EN},
465 {0x06fa, 0x070d, AL},
466 {0x070f, 0x070f, BN},
467 {0x0710, 0x0710, AL},
468 {0x0711, 0x0711, NSM},
469 {0x0712, 0x072f, AL},
470 {0x0730, 0x074a, NSM},
471 {0x074d, 0x074f, AL},
472 {0x0780, 0x07a5, AL},
473 {0x07a6, 0x07b0, NSM},
474 {0x07b1, 0x07b1, AL},
475 {0x0901, 0x0902, NSM},
476 {0x0903, 0x0939, L},
477 {0x093c, 0x093c, NSM},
478 {0x093d, 0x0940, L},
479 {0x0941, 0x0948, NSM},
480 {0x0949, 0x094c, L},
481 {0x094d, 0x094d, NSM},
482 {0x0950, 0x0950, L},
483 {0x0951, 0x0954, NSM},
484 {0x0958, 0x0961, L},
485 {0x0962, 0x0963, NSM},
486 {0x0964, 0x0970, L},
487 {0x0981, 0x0981, NSM},
488 {0x0982, 0x0983, L},
489 {0x0985, 0x098c, L},
490 {0x098f, 0x0990, L},
491 {0x0993, 0x09a8, L},
492 {0x09aa, 0x09b0, L},
493 {0x09b2, 0x09b2, L},
494 {0x09b6, 0x09b9, L},
495 {0x09bc, 0x09bc, NSM},
496 {0x09bd, 0x09c0, L},
497 {0x09c1, 0x09c4, NSM},
498 {0x09c7, 0x09c8, L},
499 {0x09cb, 0x09cc, L},
500 {0x09cd, 0x09cd, NSM},
501 {0x09d7, 0x09d7, L},
502 {0x09dc, 0x09dd, L},
503 {0x09df, 0x09e1, L},
504 {0x09e2, 0x09e3, NSM},
505 {0x09e6, 0x09f1, L},
506 {0x09f2, 0x09f3, ET},
507 {0x09f4, 0x09fa, L},
508 {0x0a01, 0x0a02, NSM},
509 {0x0a03, 0x0a03, L},
510 {0x0a05, 0x0a0a, L},
511 {0x0a0f, 0x0a10, L},
512 {0x0a13, 0x0a28, L},
513 {0x0a2a, 0x0a30, L},
514 {0x0a32, 0x0a33, L},
515 {0x0a35, 0x0a36, L},
516 {0x0a38, 0x0a39, L},
517 {0x0a3c, 0x0a3c, NSM},
518 {0x0a3e, 0x0a40, L},
519 {0x0a41, 0x0a42, NSM},
520 {0x0a47, 0x0a48, NSM},
521 {0x0a4b, 0x0a4d, NSM},
522 {0x0a59, 0x0a5c, L},
523 {0x0a5e, 0x0a5e, L},
524 {0x0a66, 0x0a6f, L},
525 {0x0a70, 0x0a71, NSM},
526 {0x0a72, 0x0a74, L},
527 {0x0a81, 0x0a82, NSM},
528 {0x0a83, 0x0a83, L},
529 {0x0a85, 0x0a8d, L},
530 {0x0a8f, 0x0a91, L},
531 {0x0a93, 0x0aa8, L},
532 {0x0aaa, 0x0ab0, L},
533 {0x0ab2, 0x0ab3, L},
534 {0x0ab5, 0x0ab9, L},
535 {0x0abc, 0x0abc, NSM},
536 {0x0abd, 0x0ac0, L},
537 {0x0ac1, 0x0ac5, NSM},
538 {0x0ac7, 0x0ac8, NSM},
539 {0x0ac9, 0x0ac9, L},
540 {0x0acb, 0x0acc, L},
541 {0x0acd, 0x0acd, NSM},
542 {0x0ad0, 0x0ad0, L},
543 {0x0ae0, 0x0ae1, L},
544 {0x0ae2, 0x0ae3, NSM},
545 {0x0ae6, 0x0aef, L},
546 {0x0af1, 0x0af1, ET},
547 {0x0b01, 0x0b01, NSM},
548 {0x0b02, 0x0b03, L},
549 {0x0b05, 0x0b0c, L},
550 {0x0b0f, 0x0b10, L},
551 {0x0b13, 0x0b28, L},
552 {0x0b2a, 0x0b30, L},
553 {0x0b32, 0x0b33, L},
554 {0x0b35, 0x0b39, L},
555 {0x0b3c, 0x0b3c, NSM},
556 {0x0b3d, 0x0b3e, L},
557 {0x0b3f, 0x0b3f, NSM},
558 {0x0b40, 0x0b40, L},
559 {0x0b41, 0x0b43, NSM},
560 {0x0b47, 0x0b48, L},
561 {0x0b4b, 0x0b4c, L},
562 {0x0b4d, 0x0b4d, NSM},
563 {0x0b56, 0x0b56, NSM},
564 {0x0b57, 0x0b57, L},
565 {0x0b5c, 0x0b5d, L},
566 {0x0b5f, 0x0b61, L},
567 {0x0b66, 0x0b71, L},
568 {0x0b82, 0x0b82, NSM},
569 {0x0b83, 0x0b83, L},
570 {0x0b85, 0x0b8a, L},
571 {0x0b8e, 0x0b90, L},
572 {0x0b92, 0x0b95, L},
573 {0x0b99, 0x0b9a, L},
574 {0x0b9c, 0x0b9c, L},
575 {0x0b9e, 0x0b9f, L},
576 {0x0ba3, 0x0ba4, L},
577 {0x0ba8, 0x0baa, L},
578 {0x0bae, 0x0bb5, L},
579 {0x0bb7, 0x0bb9, L},
580 {0x0bbe, 0x0bbf, L},
581 {0x0bc0, 0x0bc0, NSM},
582 {0x0bc1, 0x0bc2, L},
583 {0x0bc6, 0x0bc8, L},
584 {0x0bca, 0x0bcc, L},
585 {0x0bcd, 0x0bcd, NSM},
586 {0x0bd7, 0x0bd7, L},
587 {0x0be7, 0x0bf2, L},
588 {0x0bf9, 0x0bf9, ET},
589 {0x0c01, 0x0c03, L},
590 {0x0c05, 0x0c0c, L},
591 {0x0c0e, 0x0c10, L},
592 {0x0c12, 0x0c28, L},
593 {0x0c2a, 0x0c33, L},
594 {0x0c35, 0x0c39, L},
595 {0x0c3e, 0x0c40, NSM},
596 {0x0c41, 0x0c44, L},
597 {0x0c46, 0x0c48, NSM},
598 {0x0c4a, 0x0c4d, NSM},
599 {0x0c55, 0x0c56, NSM},
600 {0x0c60, 0x0c61, L},
601 {0x0c66, 0x0c6f, L},
602 {0x0c82, 0x0c83, L},
603 {0x0c85, 0x0c8c, L},
604 {0x0c8e, 0x0c90, L},
605 {0x0c92, 0x0ca8, L},
606 {0x0caa, 0x0cb3, L},
607 {0x0cb5, 0x0cb9, L},
608 {0x0cbc, 0x0cbc, NSM},
609 {0x0cbd, 0x0cc4, L},
610 {0x0cc6, 0x0cc8, L},
611 {0x0cca, 0x0ccb, L},
612 {0x0ccc, 0x0ccd, NSM},
613 {0x0cd5, 0x0cd6, L},
614 {0x0cde, 0x0cde, L},
615 {0x0ce0, 0x0ce1, L},
616 {0x0ce6, 0x0cef, L},
617 {0x0d02, 0x0d03, L},
618 {0x0d05, 0x0d0c, L},
619 {0x0d0e, 0x0d10, L},
620 {0x0d12, 0x0d28, L},
621 {0x0d2a, 0x0d39, L},
622 {0x0d3e, 0x0d40, L},
623 {0x0d41, 0x0d43, NSM},
624 {0x0d46, 0x0d48, L},
625 {0x0d4a, 0x0d4c, L},
626 {0x0d4d, 0x0d4d, NSM},
627 {0x0d57, 0x0d57, L},
628 {0x0d60, 0x0d61, L},
629 {0x0d66, 0x0d6f, L},
630 {0x0d82, 0x0d83, L},
631 {0x0d85, 0x0d96, L},
632 {0x0d9a, 0x0db1, L},
633 {0x0db3, 0x0dbb, L},
634 {0x0dbd, 0x0dbd, L},
635 {0x0dc0, 0x0dc6, L},
636 {0x0dca, 0x0dca, NSM},
637 {0x0dcf, 0x0dd1, L},
638 {0x0dd2, 0x0dd4, NSM},
639 {0x0dd6, 0x0dd6, NSM},
640 {0x0dd8, 0x0ddf, L},
641 {0x0df2, 0x0df4, L},
642 {0x0e01, 0x0e30, L},
643 {0x0e31, 0x0e31, NSM},
644 {0x0e32, 0x0e33, L},
645 {0x0e34, 0x0e3a, NSM},
646 {0x0e3f, 0x0e3f, ET},
647 {0x0e40, 0x0e46, L},
648 {0x0e47, 0x0e4e, NSM},
649 {0x0e4f, 0x0e5b, L},
650 {0x0e81, 0x0e82, L},
651 {0x0e84, 0x0e84, L},
652 {0x0e87, 0x0e88, L},
653 {0x0e8a, 0x0e8a, L},
654 {0x0e8d, 0x0e8d, L},
655 {0x0e94, 0x0e97, L},
656 {0x0e99, 0x0e9f, L},
657 {0x0ea1, 0x0ea3, L},
658 {0x0ea5, 0x0ea5, L},
659 {0x0ea7, 0x0ea7, L},
660 {0x0eaa, 0x0eab, L},
661 {0x0ead, 0x0eb0, L},
662 {0x0eb1, 0x0eb1, NSM},
663 {0x0eb2, 0x0eb3, L},
664 {0x0eb4, 0x0eb9, NSM},
665 {0x0ebb, 0x0ebc, NSM},
666 {0x0ebd, 0x0ebd, L},
667 {0x0ec0, 0x0ec4, L},
668 {0x0ec6, 0x0ec6, L},
669 {0x0ec8, 0x0ecd, NSM},
670 {0x0ed0, 0x0ed9, L},
671 {0x0edc, 0x0edd, L},
672 {0x0f00, 0x0f17, L},
673 {0x0f18, 0x0f19, NSM},
674 {0x0f1a, 0x0f34, L},
675 {0x0f35, 0x0f35, NSM},
676 {0x0f36, 0x0f36, L},
677 {0x0f37, 0x0f37, NSM},
678 {0x0f38, 0x0f38, L},
679 {0x0f39, 0x0f39, NSM},
680 {0x0f3e, 0x0f47, L},
681 {0x0f49, 0x0f6a, L},
682 {0x0f71, 0x0f7e, NSM},
683 {0x0f7f, 0x0f7f, L},
684 {0x0f80, 0x0f84, NSM},
685 {0x0f85, 0x0f85, L},
686 {0x0f86, 0x0f87, NSM},
687 {0x0f88, 0x0f8b, L},
688 {0x0f90, 0x0f97, NSM},
689 {0x0f99, 0x0fbc, NSM},
690 {0x0fbe, 0x0fc5, L},
691 {0x0fc6, 0x0fc6, NSM},
692 {0x0fc7, 0x0fcc, L},
693 {0x0fcf, 0x0fcf, L},
694 {0x1000, 0x1021, L},
695 {0x1023, 0x1027, L},
696 {0x1029, 0x102a, L},
697 {0x102c, 0x102c, L},
698 {0x102d, 0x1030, NSM},
699 {0x1031, 0x1031, L},
700 {0x1032, 0x1032, NSM},
701 {0x1036, 0x1037, NSM},
702 {0x1038, 0x1038, L},
703 {0x1039, 0x1039, NSM},
704 {0x1040, 0x1057, L},
705 {0x1058, 0x1059, NSM},
706 {0x10a0, 0x10c5, L},
707 {0x10d0, 0x10f8, L},
708 {0x10fb, 0x10fb, L},
709 {0x1100, 0x1159, L},
710 {0x115f, 0x11a2, L},
711 {0x11a8, 0x11f9, L},
712 {0x1200, 0x1206, L},
713 {0x1208, 0x1246, L},
714 {0x1248, 0x1248, L},
715 {0x124a, 0x124d, L},
716 {0x1250, 0x1256, L},
717 {0x1258, 0x1258, L},
718 {0x125a, 0x125d, L},
719 {0x1260, 0x1286, L},
720 {0x1288, 0x1288, L},
721 {0x128a, 0x128d, L},
722 {0x1290, 0x12ae, L},
723 {0x12b0, 0x12b0, L},
724 {0x12b2, 0x12b5, L},
725 {0x12b8, 0x12be, L},
726 {0x12c0, 0x12c0, L},
727 {0x12c2, 0x12c5, L},
728 {0x12c8, 0x12ce, L},
729 {0x12d0, 0x12d6, L},
730 {0x12d8, 0x12ee, L},
731 {0x12f0, 0x130e, L},
732 {0x1310, 0x1310, L},
733 {0x1312, 0x1315, L},
734 {0x1318, 0x131e, L},
735 {0x1320, 0x1346, L},
736 {0x1348, 0x135a, L},
737 {0x1361, 0x137c, L},
738 {0x13a0, 0x13f4, L},
739 {0x1401, 0x1676, L},
740 {0x1680, 0x1680, WS},
741 {0x1681, 0x169a, L},
742 {0x16a0, 0x16f0, L},
743 {0x1700, 0x170c, L},
744 {0x170e, 0x1711, L},
745 {0x1712, 0x1714, NSM},
746 {0x1720, 0x1731, L},
747 {0x1732, 0x1734, NSM},
748 {0x1735, 0x1736, L},
749 {0x1740, 0x1751, L},
750 {0x1752, 0x1753, NSM},
751 {0x1760, 0x176c, L},
752 {0x176e, 0x1770, L},
753 {0x1772, 0x1773, NSM},
754 {0x1780, 0x17b6, L},
755 {0x17b7, 0x17bd, NSM},
756 {0x17be, 0x17c5, L},
757 {0x17c6, 0x17c6, NSM},
758 {0x17c7, 0x17c8, L},
759 {0x17c9, 0x17d3, NSM},
760 {0x17d4, 0x17da, L},
761 {0x17db, 0x17db, ET},
762 {0x17dc, 0x17dc, L},
763 {0x17dd, 0x17dd, NSM},
764 {0x17e0, 0x17e9, L},
765 {0x180b, 0x180d, NSM},
766 {0x180e, 0x180e, WS},
767 {0x1810, 0x1819, L},
768 {0x1820, 0x1877, L},
769 {0x1880, 0x18a8, L},
770 {0x18a9, 0x18a9, NSM},
771 {0x1900, 0x191c, L},
772 {0x1920, 0x1922, NSM},
773 {0x1923, 0x1926, L},
774 {0x1927, 0x192b, NSM},
775 {0x1930, 0x1931, L},
776 {0x1932, 0x1932, NSM},
777 {0x1933, 0x1938, L},
778 {0x1939, 0x193b, NSM},
779 {0x1946, 0x196d, L},
780 {0x1970, 0x1974, L},
781 {0x1d00, 0x1d6b, L},
782 {0x1e00, 0x1e9b, L},
783 {0x1ea0, 0x1ef9, L},
784 {0x1f00, 0x1f15, L},
785 {0x1f18, 0x1f1d, L},
786 {0x1f20, 0x1f45, L},
787 {0x1f48, 0x1f4d, L},
788 {0x1f50, 0x1f57, L},
789 {0x1f59, 0x1f59, L},
790 {0x1f5b, 0x1f5b, L},
791 {0x1f5d, 0x1f5d, L},
792 {0x1f5f, 0x1f7d, L},
793 {0x1f80, 0x1fb4, L},
794 {0x1fb6, 0x1fbc, L},
795 {0x1fbe, 0x1fbe, L},
796 {0x1fc2, 0x1fc4, L},
797 {0x1fc6, 0x1fcc, L},
798 {0x1fd0, 0x1fd3, L},
799 {0x1fd6, 0x1fdb, L},
800 {0x1fe0, 0x1fec, L},
801 {0x1ff2, 0x1ff4, L},
802 {0x1ff6, 0x1ffc, L},
803 {0x2000, 0x200a, WS},
804 {0x200b, 0x200d, BN},
805 {0x200e, 0x200e, L},
806 {0x200f, 0x200f, R},
807 {0x2028, 0x2028, WS},
808 {0x2029, 0x2029, B},
809 {0x202a, 0x202a, LRE},
810 {0x202b, 0x202b, RLE},
811 {0x202c, 0x202c, PDF},
812 {0x202d, 0x202d, LRO},
813 {0x202e, 0x202e, RLO},
814 {0x202f, 0x202f, WS},
815 {0x2030, 0x2034, ET},
816 {0x2044, 0x2044, CS},
817 {0x205f, 0x205f, WS},
818 {0x2060, 0x2063, BN},
819 {0x206a, 0x206f, BN},
820 {0x2070, 0x2070, EN},
821 {0x2071, 0x2071, L},
822 {0x2074, 0x2079, EN},
823 {0x207a, 0x207b, ET},
824 {0x207f, 0x207f, L},
825 {0x2080, 0x2089, EN},
826 {0x208a, 0x208b, ET},
827 {0x20a0, 0x20b1, ET},
828 {0x20d0, 0x20ea, NSM},
829 {0x2102, 0x2102, L},
830 {0x2107, 0x2107, L},
831 {0x210a, 0x2113, L},
832 {0x2115, 0x2115, L},
833 {0x2119, 0x211d, L},
834 {0x2124, 0x2124, L},
835 {0x2126, 0x2126, L},
836 {0x2128, 0x2128, L},
837 {0x212a, 0x212d, L},
838 {0x212e, 0x212e, ET},
839 {0x212f, 0x2131, L},
840 {0x2133, 0x2139, L},
841 {0x213d, 0x213f, L},
842 {0x2145, 0x2149, L},
843 {0x2160, 0x2183, L},
844 {0x2212, 0x2213, ET},
845 {0x2336, 0x237a, L},
846 {0x2395, 0x2395, L},
847 {0x2488, 0x249b, EN},
848 {0x249c, 0x24e9, L},
849 {0x2800, 0x28ff, L},
850 {0x3000, 0x3000, WS},
851 {0x3005, 0x3007, L},
852 {0x3021, 0x3029, L},
853 {0x302a, 0x302f, NSM},
854 {0x3031, 0x3035, L},
855 {0x3038, 0x303c, L},
856 {0x3041, 0x3096, L},
857 {0x3099, 0x309a, NSM},
858 {0x309d, 0x309f, L},
859 {0x30a1, 0x30fa, L},
860 {0x30fc, 0x30ff, L},
861 {0x3105, 0x312c, L},
862 {0x3131, 0x318e, L},
863 {0x3190, 0x31b7, L},
864 {0x31f0, 0x321c, L},
865 {0x3220, 0x3243, L},
866 {0x3260, 0x327b, L},
867 {0x327f, 0x32b0, L},
868 {0x32c0, 0x32cb, L},
869 {0x32d0, 0x32fe, L},
870 {0x3300, 0x3376, L},
871 {0x337b, 0x33dd, L},
872 {0x33e0, 0x33fe, L},
873 {0x3400, 0x4db5, L},
874 {0x4e00, 0x9fa5, L},
875 {0xa000, 0xa48c, L},
876 {0xac00, 0xd7a3, L},
877 {0xd800, 0xfa2d, L},
878 {0xfa30, 0xfa6a, L},
879 {0xfb00, 0xfb06, L},
880 {0xfb13, 0xfb17, L},
881 {0xfb1d, 0xfb1d, R},
882 {0xfb1e, 0xfb1e, NSM},
883 {0xfb1f, 0xfb28, R},
884 {0xfb29, 0xfb29, ET},
885 {0xfb2a, 0xfb36, R},
886 {0xfb38, 0xfb3c, R},
887 {0xfb3e, 0xfb3e, R},
888 {0xfb40, 0xfb41, R},
889 {0xfb43, 0xfb44, R},
890 {0xfb46, 0xfb4f, R},
891 {0xfb50, 0xfbb1, AL},
892 {0xfbd3, 0xfd3d, AL},
893 {0xfd50, 0xfd8f, AL},
894 {0xfd92, 0xfdc7, AL},
895 {0xfdf0, 0xfdfc, AL},
896 {0xfe00, 0xfe0f, NSM},
897 {0xfe20, 0xfe23, NSM},
898 {0xfe50, 0xfe50, CS},
899 {0xfe52, 0xfe52, CS},
900 {0xfe55, 0xfe55, CS},
901 {0xfe5f, 0xfe5f, ET},
902 {0xfe62, 0xfe63, ET},
903 {0xfe69, 0xfe6a, ET},
904 {0xfe70, 0xfe74, AL},
905 {0xfe76, 0xfefc, AL},
906 {0xfeff, 0xfeff, BN},
907 {0xff03, 0xff05, ET},
908 {0xff0b, 0xff0b, ET},
909 {0xff0c, 0xff0c, CS},
910 {0xff0d, 0xff0d, ET},
911 {0xff0e, 0xff0e, CS},
912 {0xff0f, 0xff0f, ES},
913 {0xff10, 0xff19, EN},
914 {0xff1a, 0xff1a, CS},
915 {0xff21, 0xff3a, L},
916 {0xff41, 0xff5a, L},
917 {0xff66, 0xffbe, L},
918 {0xffc2, 0xffc7, L},
919 {0xffca, 0xffcf, L},
920 {0xffd2, 0xffd7, L},
921 {0xffda, 0xffdc, L},
922 {0xffe0, 0xffe1, ET},
923 {0xffe5, 0xffe6, ET},
924 {0x10000, 0x1000b, L},
925 {0x1000d, 0x10026, L},
926 {0x10028, 0x1003a, L},
927 {0x1003c, 0x1003d, L},
928 {0x1003f, 0x1004d, L},
929 {0x10050, 0x1005d, L},
930 {0x10080, 0x100fa, L},
931 {0x10100, 0x10100, L},
932 {0x10102, 0x10102, L},
933 {0x10107, 0x10133, L},
934 {0x10137, 0x1013f, L},
935 {0x10300, 0x1031e, L},
936 {0x10320, 0x10323, L},
937 {0x10330, 0x1034a, L},
938 {0x10380, 0x1039d, L},
939 {0x1039f, 0x1039f, L},
940 {0x10400, 0x1049d, L},
941 {0x104a0, 0x104a9, L},
942 {0x10800, 0x10805, R},
943 {0x10808, 0x10808, R},
944 {0x1080a, 0x10835, R},
945 {0x10837, 0x10838, R},
946 {0x1083c, 0x1083c, R},
947 {0x1083f, 0x1083f, R},
948 {0x1d000, 0x1d0f5, L},
949 {0x1d100, 0x1d126, L},
950 {0x1d12a, 0x1d166, L},
951 {0x1d167, 0x1d169, NSM},
952 {0x1d16a, 0x1d172, L},
953 {0x1d173, 0x1d17a, BN},
954 {0x1d17b, 0x1d182, NSM},
955 {0x1d183, 0x1d184, L},
956 {0x1d185, 0x1d18b, NSM},
957 {0x1d18c, 0x1d1a9, L},
958 {0x1d1aa, 0x1d1ad, NSM},
959 {0x1d1ae, 0x1d1dd, L},
960 {0x1d400, 0x1d454, L},
961 {0x1d456, 0x1d49c, L},
962 {0x1d49e, 0x1d49f, L},
963 {0x1d4a2, 0x1d4a2, L},
964 {0x1d4a5, 0x1d4a6, L},
965 {0x1d4a9, 0x1d4ac, L},
966 {0x1d4ae, 0x1d4b9, L},
967 {0x1d4bb, 0x1d4bb, L},
968 {0x1d4bd, 0x1d4c3, L},
969 {0x1d4c5, 0x1d505, L},
970 {0x1d507, 0x1d50a, L},
971 {0x1d50d, 0x1d514, L},
972 {0x1d516, 0x1d51c, L},
973 {0x1d51e, 0x1d539, L},
974 {0x1d53b, 0x1d53e, L},
975 {0x1d540, 0x1d544, L},
976 {0x1d546, 0x1d546, L},
977 {0x1d54a, 0x1d550, L},
978 {0x1d552, 0x1d6a3, L},
979 {0x1d6a8, 0x1d7c9, L},
980 {0x1d7ce, 0x1d7ff, EN},
981 {0x20000, 0x2a6d6, L},
982 {0x2f800, 0x2fa1d, L},
983 {0xe0001, 0xe0001, BN},
984 {0xe0020, 0xe007f, BN},
985 {0xe0100, 0xe01ef, NSM},
986 {0xf0000, 0xffffd, L},
d7891209 987 {0x100000, 0x10fffd, L}
7bc1ffdf 988 };
989
990 int i, j, k;
991
992 i = -1;
993 j = lenof(lookup);
994
197c43dd 995 while (j - i > 1) {
7bc1ffdf 996 k = (i + j) / 2;
997 if (ch < lookup[k].first)
998 j = k;
999 else if (ch > lookup[k].last)
1000 i = k;
1001 else
1002 return lookup[k].type;
1003 }
1004
1005 /*
1006 * If we reach here, the character was not in any of the
1007 * intervals listed in the lookup table. This means we return
1008 * ON (`Other Neutrals'). This is the appropriate code for any
1009 * character genuinely not listed in the Unicode table, and
1010 * also the table above has deliberately left out any
1011 * characters _explicitly_ listed as ON (to save space!).
1012 */
1013 return ON;
f0fccd51 1014}
1015
1016/*
fe75e503 1017 * Function exported to front ends to allow them to identify
1018 * bidi-active characters (in case, for example, the platform's
1019 * text display function can't conveniently be prevented from doing
1020 * its own bidi and so special treatment is required for characters
1021 * that would cause the bidi algorithm to activate).
1022 *
1023 * This function is passed a single Unicode code point, and returns
1024 * nonzero if the presence of this code point can possibly cause
1025 * the bidi algorithm to do any reordering. Thus, any string
1026 * composed entirely of characters for which is_rtl() returns zero
1027 * should be safe to pass to a bidi-active platform display
1028 * function without fear.
1029 *
1030 * (is_rtl() must therefore also return true for any character
1031 * which would be affected by Arabic shaping, but this isn't
1032 * important because all such characters are right-to-left so it
1033 * would have flagged them anyway.)
1034 */
1035int is_rtl(int c)
1036{
1037 /*
1038 * After careful reading of the Unicode bidi algorithm (URL as
1039 * given at the top of this file) I believe that the only
1040 * character classes which can possibly cause trouble are R,
1041 * AL, RLE and RLO. I think that any string containing no
1042 * character in any of those classes will be displayed
1043 * uniformly left-to-right by the Unicode bidi algorithm.
1044 */
1045 const int mask = (1<<R) | (1<<AL) | (1<<RLE) | (1<<RLO);
1046
1047 return mask & (1 << (getType(c)));
1048}
1049
1050/*
f0fccd51 1051 * The most significant 2 bits of each level are used to store
1052 * Override status of each character
1053 * This function sets the override bits of level according
1054 * to the value in override, and reurns the new byte.
1055 */
1056unsigned char setOverrideBits(unsigned char level, unsigned char override)
1057{
cd985a32 1058 if (override == ON)
f0fccd51 1059 return level;
cd985a32 1060 else if (override == R)
f0fccd51 1061 return level | OISR;
cd985a32 1062 else if (override == L)
f0fccd51 1063 return level | OISL;
1064 return level;
1065}
1066
598b33ba 1067/*
1068 * Find the most recent run of the same value in `level', and
1069 * return the value _before_ it. Used to process U+202C POP
1070 * DIRECTIONAL FORMATTING.
1071 */
1072int getPreviousLevel(unsigned char* level, int from)
f0fccd51 1073{
598b33ba 1074 if (from > 0) {
1075 unsigned char current = level[--from];
1076
1077 while (from >= 0 && level[from] == current)
1078 from--;
1079
1080 if (from >= 0)
1081 return level[from];
1082
1083 return -1;
1084 } else
1085 return -1;
f0fccd51 1086}
1087
f0fccd51 1088/* The Main shaping function, and the only one to be used
1089 * by the outside world.
1090 *
1091 * line: buffer to apply shaping to. this must be passed by doBidi() first
1092 * to: output buffer for the shaped data
1093 * count: number of characters in line
1094 */
1095int do_shape(bidi_char *line, bidi_char *to, int count)
1096{
1097 int i, tempShape, ligFlag;
1098
cd985a32 1099 for (ligFlag=i=0; i<count; i++) {
f0fccd51 1100 to[i] = line[i];
1101 tempShape = STYPE(line[i].wc);
cd985a32 1102 switch (tempShape) {
f0fccd51 1103 case SC:
1104 break;
1105
1106 case SU:
1107 break;
1108
1109 case SR:
598b33ba 1110 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
cd985a32 1111 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 1112 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
1113 else
1114 to[i].wc = SISOLATED(line[i].wc);
1115 break;
1116
1117
1118 case SD:
1119 /* Make Ligatures */
598b33ba 1120 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
cd985a32 1121 if (line[i].wc == 0x644) {
1122 if (i > 0) switch (line[i-1].wc) {
f0fccd51 1123 case 0x622:
1124 ligFlag = 1;
cd985a32 1125 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 1126 to[i].wc = 0xFEF6;
1127 else
1128 to[i].wc = 0xFEF5;
1129 break;
1130 case 0x623:
1131 ligFlag = 1;
cd985a32 1132 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 1133 to[i].wc = 0xFEF8;
1134 else
1135 to[i].wc = 0xFEF7;
1136 break;
1137 case 0x625:
1138 ligFlag = 1;
cd985a32 1139 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 1140 to[i].wc = 0xFEFA;
1141 else
1142 to[i].wc = 0xFEF9;
1143 break;
1144 case 0x627:
1145 ligFlag = 1;
cd985a32 1146 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 1147 to[i].wc = 0xFEFC;
1148 else
1149 to[i].wc = 0xFEFB;
1150 break;
1151 }
cd985a32 1152 if (ligFlag) {
f0fccd51 1153 to[i-1].wc = 0x20;
1154 ligFlag = 0;
1155 break;
1156 }
1157 }
1158
cd985a32 1159 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
598b33ba 1160 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
cd985a32 1161 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
1162 to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
f0fccd51 1163 else
1164 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
1165 break;
1166 }
1167
598b33ba 1168 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
cd985a32 1169 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
f0fccd51 1170 to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
1171 else
1172 to[i].wc = SISOLATED(line[i].wc);
1173 break;
1174
1175
1176 }
1177 }
1178 return 1;
1179}
1180
1181/*
1182 * The Main Bidi Function, and the only function that should
1183 * be used by the outside world.
1184 *
1185 * line: a buffer of size count containing text to apply
1186 * the Bidirectional algorithm to.
1187 */
1188
1189int do_bidi(bidi_char *line, int count)
1190{
1191 unsigned char* types;
1192 unsigned char* levels;
1193 unsigned char paragraphLevel;
1194 unsigned char currentEmbedding;
1195 unsigned char currentOverride;
1196 unsigned char tempType;
1197 int i, j, imax, yes, bover;
1198
1199 /* Check the presence of R or AL types as optimization */
1200 yes = 0;
cd985a32 1201 for (i=0; i<count; i++) {
31626f30 1202 int type = getType(line[i].wc);
1203 if (type == R || type == AL) {
f0fccd51 1204 yes = 1;
1205 break;
1206 }
1207 }
cd985a32 1208 if (yes == 0)
f0fccd51 1209 return L;
1210
1211 /* Initialize types, levels */
31626f30 1212 types = snewn(count, unsigned char);
1213 levels = snewn(count, unsigned char);
f0fccd51 1214
1215 /* Rule (P1) NOT IMPLEMENTED
1216 * P1. Split the text into separate paragraphs. A paragraph separator is
1217 * kept with the previous paragraph. Within each paragraph, apply all the
1218 * other rules of this algorithm.
1219 */
1220
1221 /* Rule (P2), (P3)
1222 * P2. In each paragraph, find the first character of type L, AL, or R.
1223 * P3. If a character is found in P2 and it is of type AL or R, then set
1224 * the paragraph embedding level to one; otherwise, set it to zero.
1225 */
1226 paragraphLevel = 0;
cd985a32 1227 for (i=0; i<count ; i++) {
31626f30 1228 int type = getType(line[i].wc);
1229 if (type == R || type == AL) {
f0fccd51 1230 paragraphLevel = 1;
1231 break;
31626f30 1232 } else if (type == L)
f0fccd51 1233 break;
1234 }
1235
1236 /* Rule (X1)
1237 * X1. Begin by setting the current embedding level to the paragraph
1238 * embedding level. Set the directional override status to neutral.
1239 */
1240 currentEmbedding = paragraphLevel;
1241 currentOverride = ON;
1242
1243 /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
1244 * X2. With each RLE, compute the least greater odd embedding level.
1245 * X3. With each LRE, compute the least greater even embedding level.
1246 * X4. With each RLO, compute the least greater odd embedding level.
1247 * X5. With each LRO, compute the least greater even embedding level.
1248 * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
1249 * a. Set the level of the current character to the current
1250 * embedding level.
1251 * b. Whenever the directional override status is not neutral,
1252 * reset the current character type to the directional
1253 * override status.
1254 * X7. With each PDF, determine the matching embedding or override code.
1255 * If there was a valid matching code, restore (pop) the last
1256 * remembered (pushed) embedding level and directional override.
1257 * X8. All explicit directional embeddings and overrides are completely
1258 * terminated at the end of each paragraph. Paragraph separators are not
1259 * included in the embedding. (Useless here) NOT IMPLEMENTED
1260 */
1261 bover = 0;
cd985a32 1262 for (i=0; i<count; i++) {
f0fccd51 1263 tempType = getType(line[i].wc);
cd985a32 1264 switch (tempType) {
f0fccd51 1265 case RLE:
1266 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1267 levels[i] = setOverrideBits(levels[i], currentOverride);
1268 currentOverride = ON;
1269 break;
1270
1271 case LRE:
1272 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1273 levels[i] = setOverrideBits(levels[i], currentOverride);
1274 currentOverride = ON;
1275 break;
1276
1277 case RLO:
1278 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1279 tempType = currentOverride = R;
1280 bover = 1;
1281 break;
1282
1283 case LRO:
1284 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1285 tempType = currentOverride = L;
1286 bover = 1;
1287 break;
1288
1289 case PDF:
598b33ba 1290 {
1291 int prevlevel = getPreviousLevel(levels, i);
1292
1293 if (prevlevel == -1) {
1294 currentEmbedding = paragraphLevel;
1295 currentOverride = ON;
1296 } else {
1297 currentOverride = currentEmbedding & OMASK;
1298 currentEmbedding = currentEmbedding & ~OMASK;
1299 }
1300 }
f0fccd51 1301 levels[i] = currentEmbedding;
1302 break;
1303
1304 /* Whitespace is treated as neutral for now */
1305 case WS:
1306 case S:
1307 levels[i] = currentEmbedding;
1308 tempType = ON;
cd985a32 1309 if (currentOverride != ON)
f0fccd51 1310 tempType = currentOverride;
1311 break;
1312
1313 default:
1314 levels[i] = currentEmbedding;
cd985a32 1315 if (currentOverride != ON)
f0fccd51 1316 tempType = currentOverride;
1317 break;
1318
1319 }
1320 types[i] = tempType;
1321 }
1322 /* this clears out all overrides, so we can use levels safely... */
1323 /* checks bover first */
cd985a32 1324 if (bover)
1325 for (i=0; i<count; i++)
f0fccd51 1326 levels[i] = levels[i] & LMASK;
1327
1328 /* Rule (X9)
1329 * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
1330 * Here, they're converted to BN.
1331 */
cd985a32 1332 for (i=0; i<count; i++) {
1333 switch (types[i]) {
f0fccd51 1334 case RLE:
1335 case LRE:
1336 case RLO:
1337 case LRO:
1338 case PDF:
1339 types[i] = BN;
1340 break;
1341 }
1342 }
1343
1344 /* Rule (W1)
1345 * W1. Examine each non-spacing mark (NSM) in the level run, and change
1346 * the type of the NSM to the type of the previous character. If the NSM
1347 * is at the start of the level run, it will get the type of sor.
1348 */
cd985a32 1349 if (types[0] == NSM)
f0fccd51 1350 types[0] = paragraphLevel;
1351
cd985a32 1352 for (i=1; i<count; i++) {
1353 if (types[i] == NSM)
f0fccd51 1354 types[i] = types[i-1];
1355 /* Is this a safe assumption?
1356 * I assumed the previous, IS a character.
1357 */
1358 }
1359
1360 /* Rule (W2)
1361 * W2. Search backwards from each instance of a European number until the
1362 * first strong type (R, L, AL, or sor) is found. If an AL is found,
1363 * change the type of the European number to Arabic number.
1364 */
cd985a32 1365 for (i=0; i<count; i++) {
1366 if (types[i] == EN) {
f0fccd51 1367 j=i;
cd985a32 1368 while (j >= 0) {
1369 if (types[j] == AL) {
f0fccd51 1370 types[i] = AN;
1371 break;
cd985a32 1372 } else if (types[j] == R || types[j] == L) {
1373 break;
1374 }
f0fccd51 1375 j--;
1376 }
1377 }
1378 }
1379
1380 /* Rule (W3)
1381 * W3. Change all ALs to R.
1382 *
1383 * Optimization: on Rule Xn, we might set a flag on AL type
1384 * to prevent this loop in L R lines only...
1385 */
cd985a32 1386 for (i=0; i<count; i++) {
1387 if (types[i] == AL)
f0fccd51 1388 types[i] = R;
1389 }
1390
1391 /* Rule (W4)
1392 * W4. A single European separator between two European numbers changes
1393 * to a European number. A single common separator between two numbers
1394 * of the same type changes to that type.
1395 */
cd985a32 1396 for (i=1; i<(count-1); i++) {
1397 if (types[i] == ES) {
1398 if (types[i-1] == EN && types[i+1] == EN)
f0fccd51 1399 types[i] = EN;
cd985a32 1400 } else if (types[i] == CS) {
1401 if (types[i-1] == EN && types[i+1] == EN)
1402 types[i] = EN;
1403 else if (types[i-1] == AN && types[i+1] == AN)
1404 types[i] = AN;
1405 }
f0fccd51 1406 }
1407
1408 /* Rule (W5)
1409 * W5. A sequence of European terminators adjacent to European numbers
1410 * changes to all European numbers.
1411 *
1412 * Optimization: lots here... else ifs need rearrangement
1413 */
cd985a32 1414 for (i=0; i<count; i++) {
1415 if (types[i] == ET) {
1416 if (i > 0 && types[i-1] == EN) {
f0fccd51 1417 types[i] = EN;
1418 continue;
cd985a32 1419 } else if (i < count-1 && types[i+1] == EN) {
1420 types[i] = EN;
1421 continue;
1422 } else if (i < count-1 && types[i+1] == ET) {
1423 j=i;
1424 while (j <count && types[j] == ET) {
1425 j++;
1426 }
1427 if (types[j] == EN)
1428 types[i] = EN;
1429 }
f0fccd51 1430 }
1431 }
1432
1433 /* Rule (W6)
1434 * W6. Otherwise, separators and terminators change to Other Neutral:
1435 */
cd985a32 1436 for (i=0; i<count; i++) {
1437 switch (types[i]) {
f0fccd51 1438 case ES:
1439 case ET:
1440 case CS:
1441 types[i] = ON;
1442 break;
1443 }
1444 }
1445
1446 /* Rule (W7)
1447 * W7. Search backwards from each instance of a European number until
1448 * the first strong type (R, L, or sor) is found. If an L is found,
1449 * then change the type of the European number to L.
1450 */
cd985a32 1451 for (i=0; i<count; i++) {
1452 if (types[i] == EN) {
f0fccd51 1453 j=i;
cd985a32 1454 while (j >= 0) {
1455 if (types[j] == L) {
f0fccd51 1456 types[i] = L;
1457 break;
cd985a32 1458 } else if (types[j] == R || types[j] == AL) {
f0fccd51 1459 break;
1460 }
1461 j--;
1462 }
1463 }
1464 }
1465
1466 /* Rule (N1)
1467 * N1. A sequence of neutrals takes the direction of the surrounding
1468 * strong text if the text on both sides has the same direction. European
1469 * and Arabic numbers are treated as though they were R.
1470 */
cd985a32 1471 if (count >= 2 && types[0] == ON) {
1472 if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
f0fccd51 1473 types[0] = R;
cd985a32 1474 else if (types[1] == L)
f0fccd51 1475 types[0] = L;
1476 }
cd985a32 1477 for (i=1; i<(count-1); i++) {
1478 if (types[i] == ON) {
1479 if (types[i-1] == L) {
f0fccd51 1480 j=i;
cd985a32 1481 while (j<(count-1) && types[j] == ON) {
f0fccd51 1482 j++;
1483 }
cd985a32 1484 if (types[j] == L) {
1485 while (i<j) {
f0fccd51 1486 types[i] = L;
1487 i++;
1488 }
1489 }
1490
cd985a32 1491 } else if ((types[i-1] == R) ||
1492 (types[i-1] == EN) ||
1493 (types[i-1] == AN)) {
1494 j=i;
1495 while (j<(count-1) && types[j] == ON) {
1496 j++;
1497 }
1498 if ((types[j] == R) ||
1499 (types[j] == EN) ||
1500 (types[j] == AN)) {
1501 while (i<j) {
1502 types[i] = R;
1503 i++;
1504 }
1505 }
1506 }
f0fccd51 1507 }
1508 }
cd985a32 1509 if (count >= 2 && types[count-1] == ON) {
1510 if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
f0fccd51 1511 types[count-1] = R;
cd985a32 1512 else if (types[count-2] == L)
f0fccd51 1513 types[count-1] = L;
1514 }
1515
1516 /* Rule (N2)
1517 * N2. Any remaining neutrals take the embedding direction.
1518 */
cd985a32 1519 for (i=0; i<count; i++) {
1520 if (types[i] == ON) {
1521 if ((levels[i] % 2) == 0)
f0fccd51 1522 types[i] = L;
1523 else
1524 types[i] = R;
1525 }
1526 }
1527
1528 /* Rule (I1)
1529 * I1. For all characters with an even (left-to-right) embedding
1530 * direction, those of type R go up one level and those of type AN or
1531 * EN go up two levels.
1532 */
cd985a32 1533 for (i=0; i<count; i++) {
1534 if ((levels[i] % 2) == 0) {
1535 if (types[i] == R)
f0fccd51 1536 levels[i] += 1;
cd985a32 1537 else if (types[i] == AN || types[i] == EN)
f0fccd51 1538 levels[i] += 2;
1539 }
1540 }
1541
1542 /* Rule (I2)
1543 * I2. For all characters with an odd (right-to-left) embedding direction,
1544 * those of type L, EN or AN go up one level.
1545 */
cd985a32 1546 for (i=0; i<count; i++) {
1547 if ((levels[i] % 2) == 1) {
1548 if (types[i] == L || types[i] == EN || types[i] == AN)
f0fccd51 1549 levels[i] += 1;
1550 }
1551 }
1552
1553 /* Rule (L1)
1554 * L1. On each line, reset the embedding level of the following characters
1555 * to the paragraph embedding level:
1556 * (1)segment separators, (2)paragraph separators,
1557 * (3)any sequence of whitespace characters preceding
1558 * a segment separator or paragraph separator,
1559 * (4)and any sequence of white space characters
1560 * at the end of the line.
1561 * The types of characters used here are the original types, not those
1562 * modified by the previous phase.
1563 */
1564 j=count-1;
cd985a32 1565 while (j>0 && (getType(line[j].wc) == WS)) {
f0fccd51 1566 j--;
1567 }
cd985a32 1568 if (j < (count-1)) {
1569 for (j++; j<count; j++)
f0fccd51 1570 levels[j] = paragraphLevel;
1571 }
cd985a32 1572 for (i=0; i<count; i++) {
f0fccd51 1573 tempType = getType(line[i].wc);
cd985a32 1574 if (tempType == WS) {
f0fccd51 1575 j=i;
cd985a32 1576 while (j<count && (getType(line[j].wc) == WS)) {
f0fccd51 1577 j++;
1578 }
cd985a32 1579 if (j==count || getType(line[j].wc) == B ||
1580 getType(line[j].wc) == S) {
1581 for (j--; j>=i ; j--) {
f0fccd51 1582 levels[j] = paragraphLevel;
1583 }
1584 }
cd985a32 1585 } else if (tempType == B || tempType == S) {
1586 levels[i] = paragraphLevel;
1587 }
f0fccd51 1588 }
1589
1590 /* Rule (L4) NOT IMPLEMENTED
1591 * L4. A character that possesses the mirrored property as specified by
1592 * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
1593 * resolved directionality of that character is R.
1594 */
1595 /* Note: this is implemented before L2 for efficiency */
cd985a32 1596 for (i=0; i<count; i++)
1597 if ((levels[i] % 2) == 1)
f0fccd51 1598 doMirror(&line[i].wc);
1599
1600 /* Rule (L2)
1601 * L2. From the highest level found in the text to the lowest odd level on
1602 * each line, including intermediate levels not actually present in the
1603 * text, reverse any contiguous sequence of characters that are at that
1604 * level or higher
1605 */
1606 /* we flip the character string and leave the level array */
1607 imax = 0;
1608 i=0;
1609 tempType = levels[0];
cd985a32 1610 while (i < count) {
1611 if (levels[i] > tempType) {
f0fccd51 1612 tempType = levels[i];
1613 imax=i;
1614 }
1615 i++;
1616 }
1617 /* maximum level in tempType, its index in imax. */
cd985a32 1618 while (tempType > 0) { /* loop from highest level to the least odd, */
1619 /* which i assume is 1 */
f0fccd51 1620 flipThisRun(line, levels, tempType, count);
1621 tempType--;
1622 }
1623
1624 /* Rule (L3) NOT IMPLEMENTED
1625 * L3. Combining marks applied to a right-to-left base character will at
1626 * this point precede their base character. If the rendering engine
1627 * expects them to follow the base characters in the final display
1628 * process, then the ordering of the marks and the base character must
1629 * be reversed.
1630 */
31626f30 1631 sfree(types);
1632 sfree(levels);
f0fccd51 1633 return R;
1634}
1635
1636
1637/*
cd985a32 1638 * Bad, Horrible function
f0fccd51 1639 * takes a pointer to a character that is checked for
1640 * having a mirror glyph.
1641 */
1642void doMirror(wchar_t* ch)
1643{
cd985a32 1644 if ((*ch & 0xFF00) == 0) {
1645 switch (*ch) {
1646 case 0x0028: *ch = 0x0029; break;
1647 case 0x0029: *ch = 0x0028; break;
1648 case 0x003C: *ch = 0x003E; break;
1649 case 0x003E: *ch = 0x003C; break;
1650 case 0x005B: *ch = 0x005D; break;
1651 case 0x005D: *ch = 0x005B; break;
1652 case 0x007B: *ch = 0x007D; break;
1653 case 0x007D: *ch = 0x007B; break;
1654 case 0x00AB: *ch = 0x00BB; break;
1655 case 0x00BB: *ch = 0x00AB; break;
f0fccd51 1656 }
cd985a32 1657 } else if ((*ch & 0xFF00) == 0x2000) {
1658 switch (*ch) {
1659 case 0x2039: *ch = 0x203A; break;
1660 case 0x203A: *ch = 0x2039; break;
1661 case 0x2045: *ch = 0x2046; break;
1662 case 0x2046: *ch = 0x2045; break;
1663 case 0x207D: *ch = 0x207E; break;
1664 case 0x207E: *ch = 0x207D; break;
1665 case 0x208D: *ch = 0x208E; break;
1666 case 0x208E: *ch = 0x208D; break;
f0fccd51 1667 }
cd985a32 1668 } else if ((*ch & 0xFF00) == 0x2200) {
1669 switch (*ch) {
1670 case 0x2208: *ch = 0x220B; break;
1671 case 0x2209: *ch = 0x220C; break;
1672 case 0x220A: *ch = 0x220D; break;
1673 case 0x220B: *ch = 0x2208; break;
1674 case 0x220C: *ch = 0x2209; break;
1675 case 0x220D: *ch = 0x220A; break;
1676 case 0x2215: *ch = 0x29F5; break;
1677 case 0x223C: *ch = 0x223D; break;
1678 case 0x223D: *ch = 0x223C; break;
1679 case 0x2243: *ch = 0x22CD; break;
1680 case 0x2252: *ch = 0x2253; break;
1681 case 0x2253: *ch = 0x2252; break;
1682 case 0x2254: *ch = 0x2255; break;
1683 case 0x2255: *ch = 0x2254; break;
1684 case 0x2264: *ch = 0x2265; break;
1685 case 0x2265: *ch = 0x2264; break;
1686 case 0x2266: *ch = 0x2267; break;
1687 case 0x2267: *ch = 0x2266; break;
1688 case 0x2268: *ch = 0x2269; break;
1689 case 0x2269: *ch = 0x2268; break;
1690 case 0x226A: *ch = 0x226B; break;
1691 case 0x226B: *ch = 0x226A; break;
1692 case 0x226E: *ch = 0x226F; break;
1693 case 0x226F: *ch = 0x226E; break;
1694 case 0x2270: *ch = 0x2271; break;
1695 case 0x2271: *ch = 0x2270; break;
1696 case 0x2272: *ch = 0x2273; break;
1697 case 0x2273: *ch = 0x2272; break;
1698 case 0x2274: *ch = 0x2275; break;
1699 case 0x2275: *ch = 0x2274; break;
1700 case 0x2276: *ch = 0x2277; break;
1701 case 0x2277: *ch = 0x2276; break;
1702 case 0x2278: *ch = 0x2279; break;
1703 case 0x2279: *ch = 0x2278; break;
1704 case 0x227A: *ch = 0x227B; break;
1705 case 0x227B: *ch = 0x227A; break;
1706 case 0x227C: *ch = 0x227D; break;
1707 case 0x227D: *ch = 0x227C; break;
1708 case 0x227E: *ch = 0x227F; break;
1709 case 0x227F: *ch = 0x227E; break;
1710 case 0x2280: *ch = 0x2281; break;
1711 case 0x2281: *ch = 0x2280; break;
1712 case 0x2282: *ch = 0x2283; break;
1713 case 0x2283: *ch = 0x2282; break;
1714 case 0x2284: *ch = 0x2285; break;
1715 case 0x2285: *ch = 0x2284; break;
1716 case 0x2286: *ch = 0x2287; break;
1717 case 0x2287: *ch = 0x2286; break;
1718 case 0x2288: *ch = 0x2289; break;
1719 case 0x2289: *ch = 0x2288; break;
1720 case 0x228A: *ch = 0x228B; break;
1721 case 0x228B: *ch = 0x228A; break;
1722 case 0x228F: *ch = 0x2290; break;
1723 case 0x2290: *ch = 0x228F; break;
1724 case 0x2291: *ch = 0x2292; break;
1725 case 0x2292: *ch = 0x2291; break;
1726 case 0x2298: *ch = 0x29B8; break;
1727 case 0x22A2: *ch = 0x22A3; break;
1728 case 0x22A3: *ch = 0x22A2; break;
1729 case 0x22A6: *ch = 0x2ADE; break;
1730 case 0x22A8: *ch = 0x2AE4; break;
1731 case 0x22A9: *ch = 0x2AE3; break;
1732 case 0x22AB: *ch = 0x2AE5; break;
1733 case 0x22B0: *ch = 0x22B1; break;
1734 case 0x22B1: *ch = 0x22B0; break;
1735 case 0x22B2: *ch = 0x22B3; break;
1736 case 0x22B3: *ch = 0x22B2; break;
1737 case 0x22B4: *ch = 0x22B5; break;
1738 case 0x22B5: *ch = 0x22B4; break;
1739 case 0x22B6: *ch = 0x22B7; break;
1740 case 0x22B7: *ch = 0x22B6; break;
1741 case 0x22C9: *ch = 0x22CA; break;
1742 case 0x22CA: *ch = 0x22C9; break;
1743 case 0x22CB: *ch = 0x22CC; break;
1744 case 0x22CC: *ch = 0x22CB; break;
1745 case 0x22CD: *ch = 0x2243; break;
1746 case 0x22D0: *ch = 0x22D1; break;
1747 case 0x22D1: *ch = 0x22D0; break;
1748 case 0x22D6: *ch = 0x22D7; break;
1749 case 0x22D7: *ch = 0x22D6; break;
1750 case 0x22D8: *ch = 0x22D9; break;
1751 case 0x22D9: *ch = 0x22D8; break;
1752 case 0x22DA: *ch = 0x22DB; break;
1753 case 0x22DB: *ch = 0x22DA; break;
1754 case 0x22DC: *ch = 0x22DD; break;
1755 case 0x22DD: *ch = 0x22DC; break;
1756 case 0x22DE: *ch = 0x22DF; break;
1757 case 0x22DF: *ch = 0x22DE; break;
1758 case 0x22E0: *ch = 0x22E1; break;
1759 case 0x22E1: *ch = 0x22E0; break;
1760 case 0x22E2: *ch = 0x22E3; break;
1761 case 0x22E3: *ch = 0x22E2; break;
1762 case 0x22E4: *ch = 0x22E5; break;
1763 case 0x22E5: *ch = 0x22E4; break;
1764 case 0x22E6: *ch = 0x22E7; break;
1765 case 0x22E7: *ch = 0x22E6; break;
1766 case 0x22E8: *ch = 0x22E9; break;
1767 case 0x22E9: *ch = 0x22E8; break;
1768 case 0x22EA: *ch = 0x22EB; break;
1769 case 0x22EB: *ch = 0x22EA; break;
1770 case 0x22EC: *ch = 0x22ED; break;
1771 case 0x22ED: *ch = 0x22EC; break;
1772 case 0x22F0: *ch = 0x22F1; break;
1773 case 0x22F1: *ch = 0x22F0; break;
1774 case 0x22F2: *ch = 0x22FA; break;
1775 case 0x22F3: *ch = 0x22FB; break;
1776 case 0x22F4: *ch = 0x22FC; break;
1777 case 0x22F6: *ch = 0x22FD; break;
1778 case 0x22F7: *ch = 0x22FE; break;
1779 case 0x22FA: *ch = 0x22F2; break;
1780 case 0x22FB: *ch = 0x22F3; break;
1781 case 0x22FC: *ch = 0x22F4; break;
1782 case 0x22FD: *ch = 0x22F6; break;
1783 case 0x22FE: *ch = 0x22F7; break;
f0fccd51 1784 }
cd985a32 1785 } else if ((*ch & 0xFF00) == 0x2300) {
1786 switch (*ch) {
1787 case 0x2308: *ch = 0x2309; break;
1788 case 0x2309: *ch = 0x2308; break;
1789 case 0x230A: *ch = 0x230B; break;
1790 case 0x230B: *ch = 0x230A; break;
1791 case 0x2329: *ch = 0x232A; break;
1792 case 0x232A: *ch = 0x2329; break;
1793 }
1794 } else if ((*ch & 0xFF00) == 0x2700) {
1795 switch (*ch) {
1796 case 0x2768: *ch = 0x2769; break;
1797 case 0x2769: *ch = 0x2768; break;
1798 case 0x276A: *ch = 0x276B; break;
1799 case 0x276B: *ch = 0x276A; break;
1800 case 0x276C: *ch = 0x276D; break;
1801 case 0x276D: *ch = 0x276C; break;
1802 case 0x276E: *ch = 0x276F; break;
1803 case 0x276F: *ch = 0x276E; break;
1804 case 0x2770: *ch = 0x2771; break;
1805 case 0x2771: *ch = 0x2770; break;
1806 case 0x2772: *ch = 0x2773; break;
1807 case 0x2773: *ch = 0x2772; break;
1808 case 0x2774: *ch = 0x2775; break;
1809 case 0x2775: *ch = 0x2774; break;
1810 case 0x27D5: *ch = 0x27D6; break;
1811 case 0x27D6: *ch = 0x27D5; break;
1812 case 0x27DD: *ch = 0x27DE; break;
1813 case 0x27DE: *ch = 0x27DD; break;
1814 case 0x27E2: *ch = 0x27E3; break;
1815 case 0x27E3: *ch = 0x27E2; break;
1816 case 0x27E4: *ch = 0x27E5; break;
1817 case 0x27E5: *ch = 0x27E4; break;
1818 case 0x27E6: *ch = 0x27E7; break;
1819 case 0x27E7: *ch = 0x27E6; break;
1820 case 0x27E8: *ch = 0x27E9; break;
1821 case 0x27E9: *ch = 0x27E8; break;
1822 case 0x27EA: *ch = 0x27EB; break;
1823 case 0x27EB: *ch = 0x27EA; break;
f0fccd51 1824 }
cd985a32 1825 } else if ((*ch & 0xFF00) == 0x2900) {
1826 switch (*ch) {
1827 case 0x2983: *ch = 0x2984; break;
1828 case 0x2984: *ch = 0x2983; break;
1829 case 0x2985: *ch = 0x2986; break;
1830 case 0x2986: *ch = 0x2985; break;
1831 case 0x2987: *ch = 0x2988; break;
1832 case 0x2988: *ch = 0x2987; break;
1833 case 0x2989: *ch = 0x298A; break;
1834 case 0x298A: *ch = 0x2989; break;
1835 case 0x298B: *ch = 0x298C; break;
1836 case 0x298C: *ch = 0x298B; break;
1837 case 0x298D: *ch = 0x2990; break;
1838 case 0x298E: *ch = 0x298F; break;
1839 case 0x298F: *ch = 0x298E; break;
1840 case 0x2990: *ch = 0x298D; break;
1841 case 0x2991: *ch = 0x2992; break;
1842 case 0x2992: *ch = 0x2991; break;
1843 case 0x2993: *ch = 0x2994; break;
1844 case 0x2994: *ch = 0x2993; break;
1845 case 0x2995: *ch = 0x2996; break;
1846 case 0x2996: *ch = 0x2995; break;
1847 case 0x2997: *ch = 0x2998; break;
1848 case 0x2998: *ch = 0x2997; break;
1849 case 0x29B8: *ch = 0x2298; break;
1850 case 0x29C0: *ch = 0x29C1; break;
1851 case 0x29C1: *ch = 0x29C0; break;
1852 case 0x29C4: *ch = 0x29C5; break;
1853 case 0x29C5: *ch = 0x29C4; break;
1854 case 0x29CF: *ch = 0x29D0; break;
1855 case 0x29D0: *ch = 0x29CF; break;
1856 case 0x29D1: *ch = 0x29D2; break;
1857 case 0x29D2: *ch = 0x29D1; break;
1858 case 0x29D4: *ch = 0x29D5; break;
1859 case 0x29D5: *ch = 0x29D4; break;
1860 case 0x29D8: *ch = 0x29D9; break;
1861 case 0x29D9: *ch = 0x29D8; break;
1862 case 0x29DA: *ch = 0x29DB; break;
1863 case 0x29DB: *ch = 0x29DA; break;
1864 case 0x29F5: *ch = 0x2215; break;
1865 case 0x29F8: *ch = 0x29F9; break;
1866 case 0x29F9: *ch = 0x29F8; break;
1867 case 0x29FC: *ch = 0x29FD; break;
1868 case 0x29FD: *ch = 0x29FC; break;
f0fccd51 1869 }
cd985a32 1870 } else if ((*ch & 0xFF00) == 0x2A00) {
1871 switch (*ch) {
1872 case 0x2A2B: *ch = 0x2A2C; break;
1873 case 0x2A2C: *ch = 0x2A2B; break;
1874 case 0x2A2D: *ch = 0x2A2C; break;
1875 case 0x2A2E: *ch = 0x2A2D; break;
1876 case 0x2A34: *ch = 0x2A35; break;
1877 case 0x2A35: *ch = 0x2A34; break;
1878 case 0x2A3C: *ch = 0x2A3D; break;
1879 case 0x2A3D: *ch = 0x2A3C; break;
1880 case 0x2A64: *ch = 0x2A65; break;
1881 case 0x2A65: *ch = 0x2A64; break;
1882 case 0x2A79: *ch = 0x2A7A; break;
1883 case 0x2A7A: *ch = 0x2A79; break;
1884 case 0x2A7D: *ch = 0x2A7E; break;
1885 case 0x2A7E: *ch = 0x2A7D; break;
1886 case 0x2A7F: *ch = 0x2A80; break;
1887 case 0x2A80: *ch = 0x2A7F; break;
1888 case 0x2A81: *ch = 0x2A82; break;
1889 case 0x2A82: *ch = 0x2A81; break;
1890 case 0x2A83: *ch = 0x2A84; break;
1891 case 0x2A84: *ch = 0x2A83; break;
1892 case 0x2A8B: *ch = 0x2A8C; break;
1893 case 0x2A8C: *ch = 0x2A8B; break;
1894 case 0x2A91: *ch = 0x2A92; break;
1895 case 0x2A92: *ch = 0x2A91; break;
1896 case 0x2A93: *ch = 0x2A94; break;
1897 case 0x2A94: *ch = 0x2A93; break;
1898 case 0x2A95: *ch = 0x2A96; break;
1899 case 0x2A96: *ch = 0x2A95; break;
1900 case 0x2A97: *ch = 0x2A98; break;
1901 case 0x2A98: *ch = 0x2A97; break;
1902 case 0x2A99: *ch = 0x2A9A; break;
1903 case 0x2A9A: *ch = 0x2A99; break;
1904 case 0x2A9B: *ch = 0x2A9C; break;
1905 case 0x2A9C: *ch = 0x2A9B; break;
1906 case 0x2AA1: *ch = 0x2AA2; break;
1907 case 0x2AA2: *ch = 0x2AA1; break;
1908 case 0x2AA6: *ch = 0x2AA7; break;
1909 case 0x2AA7: *ch = 0x2AA6; break;
1910 case 0x2AA8: *ch = 0x2AA9; break;
1911 case 0x2AA9: *ch = 0x2AA8; break;
1912 case 0x2AAA: *ch = 0x2AAB; break;
1913 case 0x2AAB: *ch = 0x2AAA; break;
1914 case 0x2AAC: *ch = 0x2AAD; break;
1915 case 0x2AAD: *ch = 0x2AAC; break;
1916 case 0x2AAF: *ch = 0x2AB0; break;
1917 case 0x2AB0: *ch = 0x2AAF; break;
1918 case 0x2AB3: *ch = 0x2AB4; break;
1919 case 0x2AB4: *ch = 0x2AB3; break;
1920 case 0x2ABB: *ch = 0x2ABC; break;
1921 case 0x2ABC: *ch = 0x2ABB; break;
1922 case 0x2ABD: *ch = 0x2ABE; break;
1923 case 0x2ABE: *ch = 0x2ABD; break;
1924 case 0x2ABF: *ch = 0x2AC0; break;
1925 case 0x2AC0: *ch = 0x2ABF; break;
1926 case 0x2AC1: *ch = 0x2AC2; break;
1927 case 0x2AC2: *ch = 0x2AC1; break;
1928 case 0x2AC3: *ch = 0x2AC4; break;
1929 case 0x2AC4: *ch = 0x2AC3; break;
1930 case 0x2AC5: *ch = 0x2AC6; break;
1931 case 0x2AC6: *ch = 0x2AC5; break;
1932 case 0x2ACD: *ch = 0x2ACE; break;
1933 case 0x2ACE: *ch = 0x2ACD; break;
1934 case 0x2ACF: *ch = 0x2AD0; break;
1935 case 0x2AD0: *ch = 0x2ACF; break;
1936 case 0x2AD1: *ch = 0x2AD2; break;
1937 case 0x2AD2: *ch = 0x2AD1; break;
1938 case 0x2AD3: *ch = 0x2AD4; break;
1939 case 0x2AD4: *ch = 0x2AD3; break;
1940 case 0x2AD5: *ch = 0x2AD6; break;
1941 case 0x2AD6: *ch = 0x2AD5; break;
1942 case 0x2ADE: *ch = 0x22A6; break;
1943 case 0x2AE3: *ch = 0x22A9; break;
1944 case 0x2AE4: *ch = 0x22A8; break;
1945 case 0x2AE5: *ch = 0x22AB; break;
1946 case 0x2AEC: *ch = 0x2AED; break;
1947 case 0x2AED: *ch = 0x2AEC; break;
1948 case 0x2AF7: *ch = 0x2AF8; break;
1949 case 0x2AF8: *ch = 0x2AF7; break;
1950 case 0x2AF9: *ch = 0x2AFA; break;
1951 case 0x2AFA: *ch = 0x2AF9; break;
f0fccd51 1952 }
cd985a32 1953 } else if ((*ch & 0xFF00) == 0x3000) {
1954 switch (*ch) {
1955 case 0x3008: *ch = 0x3009; break;
1956 case 0x3009: *ch = 0x3008; break;
1957 case 0x300A: *ch = 0x300B; break;
1958 case 0x300B: *ch = 0x300A; break;
1959 case 0x300C: *ch = 0x300D; break;
1960 case 0x300D: *ch = 0x300C; break;
1961 case 0x300E: *ch = 0x300F; break;
1962 case 0x300F: *ch = 0x300E; break;
1963 case 0x3010: *ch = 0x3011; break;
1964 case 0x3011: *ch = 0x3010; break;
1965 case 0x3014: *ch = 0x3015; break;
1966 case 0x3015: *ch = 0x3014; break;
1967 case 0x3016: *ch = 0x3017; break;
1968 case 0x3017: *ch = 0x3016; break;
1969 case 0x3018: *ch = 0x3019; break;
1970 case 0x3019: *ch = 0x3018; break;
1971 case 0x301A: *ch = 0x301B; break;
1972 case 0x301B: *ch = 0x301A; break;
f0fccd51 1973 }
cd985a32 1974 } else if ((*ch & 0xFF00) == 0xFF00) {
1975 switch (*ch) {
1976 case 0xFF08: *ch = 0xFF09; break;
1977 case 0xFF09: *ch = 0xFF08; break;
1978 case 0xFF1C: *ch = 0xFF1E; break;
1979 case 0xFF1E: *ch = 0xFF1C; break;
1980 case 0xFF3B: *ch = 0xFF3D; break;
1981 case 0xFF3D: *ch = 0xFF3B; break;
1982 case 0xFF5B: *ch = 0xFF5D; break;
1983 case 0xFF5D: *ch = 0xFF5B; break;
1984 case 0xFF5F: *ch = 0xFF60; break;
1985 case 0xFF60: *ch = 0xFF5F; break;
1986 case 0xFF62: *ch = 0xFF63; break;
1987 case 0xFF63: *ch = 0xFF62; break;
f0fccd51 1988 }
1989 }
1990}
197c43dd 1991
1992#ifdef TEST_GETTYPE
1993
1994#include <stdio.h>
1995#include <assert.h>
1996
1997int main(int argc, char **argv)
1998{
1999 static const struct { int type; char *name; } typetoname[] = {
2000#define TYPETONAME(X) { X , #X }
2001 TYPETONAME(L),
2002 TYPETONAME(LRE),
2003 TYPETONAME(LRO),
2004 TYPETONAME(R),
2005 TYPETONAME(AL),
2006 TYPETONAME(RLE),
2007 TYPETONAME(RLO),
2008 TYPETONAME(PDF),
2009 TYPETONAME(EN),
2010 TYPETONAME(ES),
2011 TYPETONAME(ET),
2012 TYPETONAME(AN),
2013 TYPETONAME(CS),
2014 TYPETONAME(NSM),
2015 TYPETONAME(BN),
2016 TYPETONAME(B),
2017 TYPETONAME(S),
2018 TYPETONAME(WS),
2019 TYPETONAME(ON),
2020#undef TYPETONAME
2021 };
2022 int i;
2023
2024 for (i = 1; i < argc; i++) {
2025 unsigned long chr = strtoul(argv[i], NULL, 0);
2026 int type = getType(chr);
2027 assert(typetoname[type].type == type);
2028 printf("U+%04x: %s\n", chr, typetoname[type].name);
2029 }
2030
2031 return 0;
2032}
2033
2034#endif