Chris Boucher spotted that after "-nc" was added (r6823), cfg->ssh_nc_host
[u/mdw/putty] / minibidi.c
CommitLineData
f0fccd51 1/************************************************************************
ed47c4e2 2 * $Id$
f0fccd51 3 *
4 * ------------
5 * Description:
6 * ------------
7 * This is an implemention of Unicode's Bidirectional Algorithm
8 * (known as UAX #9).
9 *
10 * http://www.unicode.org/reports/tr9/
cd985a32 11 *
f0fccd51 12 * Author: Ahmad Khalifa
13 *
14 * -----------------
15 * Revision Details: (Updated by Revision Control System)
16 * -----------------
ed47c4e2 17 * $Date$
18 * $Author$
19 * $Revision$
f0fccd51 20 *
21 * (www.arabeyes.org - under MIT license)
22 *
23 ************************************************************************/
24
25/*
26 * TODO:
27 * =====
28 * - Explicit marks need to be handled (they are not 100% now)
29 * - Ligatures
30 */
31
31626f30 32#include <stdlib.h> /* definition of wchar_t*/
f0fccd51 33
31626f30 34#include "misc.h"
35
36#define LMASK 0x3F /* Embedding Level mask */
37#define OMASK 0xC0 /* Override mask */
38#define OISL 0x80 /* Override is L */
39#define OISR 0x40 /* Override is R */
40
197c43dd 41/* For standalone compilation in a testing mode.
42 * Still depends on the PuTTY headers for snewn and sfree, but can avoid
43 * _linking_ with any other PuTTY code. */
44#ifdef TEST_GETTYPE
45#define safemalloc malloc
46#define safefree free
47#endif
48
31626f30 49/* Shaping Helpers */
50#define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
51shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
52#define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
53#define SFINAL(xh) ((xh)+1)
54#define SINITIAL(xh) ((xh)+2)
55#define SMEDIAL(ch) ((ch)+3)
56
57#define leastGreaterOdd(x) ( ((x)+1) | 1 )
58#define leastGreaterEven(x) ( ((x)+2) &~ 1 )
59
60typedef struct bidi_char {
61 wchar_t origwc, wc;
62 unsigned short index;
63} bidi_char;
64
65/* function declarations */
66void flipThisRun(bidi_char *from, unsigned char* level, int max, int count);
67int findIndexOfRun(unsigned char* level , int start, int count, int tlevel);
7bc1ffdf 68unsigned char getType(int ch);
31626f30 69unsigned char setOverrideBits(unsigned char level, unsigned char override);
70int getPreviousLevel(unsigned char* level, int from);
31626f30 71int do_shape(bidi_char *line, bidi_char *to, int count);
72int do_bidi(bidi_char *line, int count);
73void doMirror(wchar_t* ch);
74
75/* character types */
76enum {
77 L,
78 LRE,
79 LRO,
80 R,
81 AL,
82 RLE,
83 RLO,
84 PDF,
85 EN,
86 ES,
87 ET,
88 AN,
89 CS,
90 NSM,
91 BN,
92 B,
93 S,
94 WS,
d7891209 95 ON
31626f30 96};
97
98/* Shaping Types */
99enum {
100 SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
101 SR, /* Right-Joining, ie has Isolated, Final */
102 SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
103 SU, /* Non-Joining */
104 SC /* Join-Causing, like U+0640 (TATWEEL) */
105};
106
107typedef struct {
108 char type;
109 wchar_t form_b;
110} shape_node;
111
112/* Kept near the actual table, for verification. */
113#define SHAPE_FIRST 0x621
114#define SHAPE_LAST 0x64A
115
116const shape_node shapetypes[] = {
117 /* index, Typ, Iso, Ligature Index*/
118 /* 621 */ {SU, 0xFE80},
119 /* 622 */ {SR, 0xFE81},
120 /* 623 */ {SR, 0xFE83},
121 /* 624 */ {SR, 0xFE85},
122 /* 625 */ {SR, 0xFE87},
123 /* 626 */ {SD, 0xFE89},
124 /* 627 */ {SR, 0xFE8D},
125 /* 628 */ {SD, 0xFE8F},
126 /* 629 */ {SR, 0xFE93},
127 /* 62A */ {SD, 0xFE95},
128 /* 62B */ {SD, 0xFE99},
129 /* 62C */ {SD, 0xFE9D},
130 /* 62D */ {SD, 0xFEA1},
131 /* 62E */ {SD, 0xFEA5},
132 /* 62F */ {SR, 0xFEA9},
133 /* 630 */ {SR, 0xFEAB},
134 /* 631 */ {SR, 0xFEAD},
135 /* 632 */ {SR, 0xFEAF},
136 /* 633 */ {SD, 0xFEB1},
137 /* 634 */ {SD, 0xFEB5},
138 /* 635 */ {SD, 0xFEB9},
139 /* 636 */ {SD, 0xFEBD},
140 /* 637 */ {SD, 0xFEC1},
141 /* 638 */ {SD, 0xFEC5},
142 /* 639 */ {SD, 0xFEC9},
143 /* 63A */ {SD, 0xFECD},
144 /* 63B */ {SU, 0x0},
145 /* 63C */ {SU, 0x0},
146 /* 63D */ {SU, 0x0},
147 /* 63E */ {SU, 0x0},
148 /* 63F */ {SU, 0x0},
149 /* 640 */ {SC, 0x0},
150 /* 641 */ {SD, 0xFED1},
151 /* 642 */ {SD, 0xFED5},
152 /* 643 */ {SD, 0xFED9},
153 /* 644 */ {SD, 0xFEDD},
154 /* 645 */ {SD, 0xFEE1},
155 /* 646 */ {SD, 0xFEE5},
156 /* 647 */ {SD, 0xFEE9},
157 /* 648 */ {SR, 0xFEED},
158 /* 649 */ {SR, 0xFEEF}, /* SD */
d7891209 159 /* 64A */ {SD, 0xFEF1}
31626f30 160};
161
162/*
f0fccd51 163 * Flips the text buffer, according to max level, and
164 * all higher levels
cd985a32 165 *
f0fccd51 166 * Input:
167 * from: text buffer, on which to apply flipping
168 * level: resolved levels buffer
169 * max: the maximum level found in this line (should be unsigned char)
170 * count: line size in bidi_char
171 */
172void flipThisRun(bidi_char *from, unsigned char *level, int max, int count)
173{
31626f30 174 int i, j, k, tlevel;
f0fccd51 175 bidi_char temp;
176
177 j = i = 0;
cd985a32 178 while (i<count && j<count) {
f0fccd51 179
180 /* find the start of the run of level=max */
181 tlevel = max;
182 i = j = findIndexOfRun(level, i, count, max);
183 /* find the end of the run */
cd985a32 184 while (i<count && tlevel <= level[i]) {
f0fccd51 185 i++;
186 }
31626f30 187 for (k = i - 1; k > j; k--, j++) {
188 temp = from[k];
189 from[k] = from[j];
190 from[j] = temp;
f0fccd51 191 }
192 }
193}
194
195/*
196 * Finds the index of a run with level equals tlevel
197 */
198int findIndexOfRun(unsigned char* level , int start, int count, int tlevel)
199{
200 int i;
cd985a32 201 for (i=start; i<count; i++) {
202 if (tlevel == level[i]) {
f0fccd51 203 return i;
204 }
205 }
206 return count;
207}
208
209/*
7bc1ffdf 210 * Returns the bidi character type of ch.
211 *
212 * The data table in this function is constructed from the Unicode
213 * Character Database, downloadable from unicode.org at the URL
214 *
215 * http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
216 *
217 * by the following fragment of Perl:
218
219perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
220 -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
221 -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
222 -e ' ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
223 -e '$pfl=$fl; END { &reset }; sub reset {' \
224 -e 'printf" {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
225 -e ' if defined $runstart and $runtype ne "ON";' \
226 -e '$runstart=$runend=$num; $runtype=$type;}' \
227 UnicodeData.txt
228
f0fccd51 229 */
7bc1ffdf 230unsigned char getType(int ch)
f0fccd51 231{
7bc1ffdf 232 static const struct {
233 int first, last, type;
234 } lookup[] = {
235 {0x0000, 0x0008, BN},
236 {0x0009, 0x0009, S},
237 {0x000a, 0x000a, B},
238 {0x000b, 0x000b, S},
239 {0x000c, 0x000c, WS},
240 {0x000d, 0x000d, B},
241 {0x000e, 0x001b, BN},
242 {0x001c, 0x001e, B},
243 {0x001f, 0x001f, S},
244 {0x0020, 0x0020, WS},
245 {0x0023, 0x0025, ET},
246 {0x002b, 0x002b, ES},
247 {0x002c, 0x002c, CS},
248 {0x002d, 0x002d, ES},
249 {0x002e, 0x002f, CS},
250 {0x0030, 0x0039, EN},
251 {0x003a, 0x003a, CS},
252 {0x0041, 0x005a, L},
253 {0x0061, 0x007a, L},
254 {0x007f, 0x0084, BN},
255 {0x0085, 0x0085, B},
256 {0x0086, 0x009f, BN},
257 {0x00a0, 0x00a0, CS},
258 {0x00a2, 0x00a5, ET},
259 {0x00aa, 0x00aa, L},
260 {0x00ad, 0x00ad, BN},
261 {0x00b0, 0x00b1, ET},
262 {0x00b2, 0x00b3, EN},
263 {0x00b5, 0x00b5, L},
264 {0x00b9, 0x00b9, EN},
265 {0x00ba, 0x00ba, L},
266 {0x00c0, 0x00d6, L},
267 {0x00d8, 0x00f6, L},
268 {0x00f8, 0x0236, L},
269 {0x0250, 0x02b8, L},
270 {0x02bb, 0x02c1, L},
271 {0x02d0, 0x02d1, L},
272 {0x02e0, 0x02e4, L},
273 {0x02ee, 0x02ee, L},
274 {0x0300, 0x0357, NSM},
275 {0x035d, 0x036f, NSM},
276 {0x037a, 0x037a, L},
277 {0x0386, 0x0386, L},
278 {0x0388, 0x038a, L},
279 {0x038c, 0x038c, L},
280 {0x038e, 0x03a1, L},
281 {0x03a3, 0x03ce, L},
282 {0x03d0, 0x03f5, L},
283 {0x03f7, 0x03fb, L},
284 {0x0400, 0x0482, L},
285 {0x0483, 0x0486, NSM},
286 {0x0488, 0x0489, NSM},
287 {0x048a, 0x04ce, L},
288 {0x04d0, 0x04f5, L},
289 {0x04f8, 0x04f9, L},
290 {0x0500, 0x050f, L},
291 {0x0531, 0x0556, L},
292 {0x0559, 0x055f, L},
293 {0x0561, 0x0587, L},
294 {0x0589, 0x0589, L},
295 {0x0591, 0x05a1, NSM},
296 {0x05a3, 0x05b9, NSM},
297 {0x05bb, 0x05bd, NSM},
298 {0x05be, 0x05be, R},
299 {0x05bf, 0x05bf, NSM},
300 {0x05c0, 0x05c0, R},
301 {0x05c1, 0x05c2, NSM},
302 {0x05c3, 0x05c3, R},
303 {0x05c4, 0x05c4, NSM},
304 {0x05d0, 0x05ea, R},
305 {0x05f0, 0x05f4, R},
306 {0x0600, 0x0603, AL},
307 {0x060c, 0x060c, CS},
308 {0x060d, 0x060d, AL},
309 {0x0610, 0x0615, NSM},
310 {0x061b, 0x061b, AL},
311 {0x061f, 0x061f, AL},
312 {0x0621, 0x063a, AL},
313 {0x0640, 0x064a, AL},
314 {0x064b, 0x0658, NSM},
315 {0x0660, 0x0669, AN},
316 {0x066a, 0x066a, ET},
317 {0x066b, 0x066c, AN},
318 {0x066d, 0x066f, AL},
319 {0x0670, 0x0670, NSM},
320 {0x0671, 0x06d5, AL},
321 {0x06d6, 0x06dc, NSM},
322 {0x06dd, 0x06dd, AL},
323 {0x06de, 0x06e4, NSM},
324 {0x06e5, 0x06e6, AL},
325 {0x06e7, 0x06e8, NSM},
326 {0x06ea, 0x06ed, NSM},
327 {0x06ee, 0x06ef, AL},
328 {0x06f0, 0x06f9, EN},
329 {0x06fa, 0x070d, AL},
330 {0x070f, 0x070f, BN},
331 {0x0710, 0x0710, AL},
332 {0x0711, 0x0711, NSM},
333 {0x0712, 0x072f, AL},
334 {0x0730, 0x074a, NSM},
335 {0x074d, 0x074f, AL},
336 {0x0780, 0x07a5, AL},
337 {0x07a6, 0x07b0, NSM},
338 {0x07b1, 0x07b1, AL},
339 {0x0901, 0x0902, NSM},
340 {0x0903, 0x0939, L},
341 {0x093c, 0x093c, NSM},
342 {0x093d, 0x0940, L},
343 {0x0941, 0x0948, NSM},
344 {0x0949, 0x094c, L},
345 {0x094d, 0x094d, NSM},
346 {0x0950, 0x0950, L},
347 {0x0951, 0x0954, NSM},
348 {0x0958, 0x0961, L},
349 {0x0962, 0x0963, NSM},
350 {0x0964, 0x0970, L},
351 {0x0981, 0x0981, NSM},
352 {0x0982, 0x0983, L},
353 {0x0985, 0x098c, L},
354 {0x098f, 0x0990, L},
355 {0x0993, 0x09a8, L},
356 {0x09aa, 0x09b0, L},
357 {0x09b2, 0x09b2, L},
358 {0x09b6, 0x09b9, L},
359 {0x09bc, 0x09bc, NSM},
360 {0x09bd, 0x09c0, L},
361 {0x09c1, 0x09c4, NSM},
362 {0x09c7, 0x09c8, L},
363 {0x09cb, 0x09cc, L},
364 {0x09cd, 0x09cd, NSM},
365 {0x09d7, 0x09d7, L},
366 {0x09dc, 0x09dd, L},
367 {0x09df, 0x09e1, L},
368 {0x09e2, 0x09e3, NSM},
369 {0x09e6, 0x09f1, L},
370 {0x09f2, 0x09f3, ET},
371 {0x09f4, 0x09fa, L},
372 {0x0a01, 0x0a02, NSM},
373 {0x0a03, 0x0a03, L},
374 {0x0a05, 0x0a0a, L},
375 {0x0a0f, 0x0a10, L},
376 {0x0a13, 0x0a28, L},
377 {0x0a2a, 0x0a30, L},
378 {0x0a32, 0x0a33, L},
379 {0x0a35, 0x0a36, L},
380 {0x0a38, 0x0a39, L},
381 {0x0a3c, 0x0a3c, NSM},
382 {0x0a3e, 0x0a40, L},
383 {0x0a41, 0x0a42, NSM},
384 {0x0a47, 0x0a48, NSM},
385 {0x0a4b, 0x0a4d, NSM},
386 {0x0a59, 0x0a5c, L},
387 {0x0a5e, 0x0a5e, L},
388 {0x0a66, 0x0a6f, L},
389 {0x0a70, 0x0a71, NSM},
390 {0x0a72, 0x0a74, L},
391 {0x0a81, 0x0a82, NSM},
392 {0x0a83, 0x0a83, L},
393 {0x0a85, 0x0a8d, L},
394 {0x0a8f, 0x0a91, L},
395 {0x0a93, 0x0aa8, L},
396 {0x0aaa, 0x0ab0, L},
397 {0x0ab2, 0x0ab3, L},
398 {0x0ab5, 0x0ab9, L},
399 {0x0abc, 0x0abc, NSM},
400 {0x0abd, 0x0ac0, L},
401 {0x0ac1, 0x0ac5, NSM},
402 {0x0ac7, 0x0ac8, NSM},
403 {0x0ac9, 0x0ac9, L},
404 {0x0acb, 0x0acc, L},
405 {0x0acd, 0x0acd, NSM},
406 {0x0ad0, 0x0ad0, L},
407 {0x0ae0, 0x0ae1, L},
408 {0x0ae2, 0x0ae3, NSM},
409 {0x0ae6, 0x0aef, L},
410 {0x0af1, 0x0af1, ET},
411 {0x0b01, 0x0b01, NSM},
412 {0x0b02, 0x0b03, L},
413 {0x0b05, 0x0b0c, L},
414 {0x0b0f, 0x0b10, L},
415 {0x0b13, 0x0b28, L},
416 {0x0b2a, 0x0b30, L},
417 {0x0b32, 0x0b33, L},
418 {0x0b35, 0x0b39, L},
419 {0x0b3c, 0x0b3c, NSM},
420 {0x0b3d, 0x0b3e, L},
421 {0x0b3f, 0x0b3f, NSM},
422 {0x0b40, 0x0b40, L},
423 {0x0b41, 0x0b43, NSM},
424 {0x0b47, 0x0b48, L},
425 {0x0b4b, 0x0b4c, L},
426 {0x0b4d, 0x0b4d, NSM},
427 {0x0b56, 0x0b56, NSM},
428 {0x0b57, 0x0b57, L},
429 {0x0b5c, 0x0b5d, L},
430 {0x0b5f, 0x0b61, L},
431 {0x0b66, 0x0b71, L},
432 {0x0b82, 0x0b82, NSM},
433 {0x0b83, 0x0b83, L},
434 {0x0b85, 0x0b8a, L},
435 {0x0b8e, 0x0b90, L},
436 {0x0b92, 0x0b95, L},
437 {0x0b99, 0x0b9a, L},
438 {0x0b9c, 0x0b9c, L},
439 {0x0b9e, 0x0b9f, L},
440 {0x0ba3, 0x0ba4, L},
441 {0x0ba8, 0x0baa, L},
442 {0x0bae, 0x0bb5, L},
443 {0x0bb7, 0x0bb9, L},
444 {0x0bbe, 0x0bbf, L},
445 {0x0bc0, 0x0bc0, NSM},
446 {0x0bc1, 0x0bc2, L},
447 {0x0bc6, 0x0bc8, L},
448 {0x0bca, 0x0bcc, L},
449 {0x0bcd, 0x0bcd, NSM},
450 {0x0bd7, 0x0bd7, L},
451 {0x0be7, 0x0bf2, L},
452 {0x0bf9, 0x0bf9, ET},
453 {0x0c01, 0x0c03, L},
454 {0x0c05, 0x0c0c, L},
455 {0x0c0e, 0x0c10, L},
456 {0x0c12, 0x0c28, L},
457 {0x0c2a, 0x0c33, L},
458 {0x0c35, 0x0c39, L},
459 {0x0c3e, 0x0c40, NSM},
460 {0x0c41, 0x0c44, L},
461 {0x0c46, 0x0c48, NSM},
462 {0x0c4a, 0x0c4d, NSM},
463 {0x0c55, 0x0c56, NSM},
464 {0x0c60, 0x0c61, L},
465 {0x0c66, 0x0c6f, L},
466 {0x0c82, 0x0c83, L},
467 {0x0c85, 0x0c8c, L},
468 {0x0c8e, 0x0c90, L},
469 {0x0c92, 0x0ca8, L},
470 {0x0caa, 0x0cb3, L},
471 {0x0cb5, 0x0cb9, L},
472 {0x0cbc, 0x0cbc, NSM},
473 {0x0cbd, 0x0cc4, L},
474 {0x0cc6, 0x0cc8, L},
475 {0x0cca, 0x0ccb, L},
476 {0x0ccc, 0x0ccd, NSM},
477 {0x0cd5, 0x0cd6, L},
478 {0x0cde, 0x0cde, L},
479 {0x0ce0, 0x0ce1, L},
480 {0x0ce6, 0x0cef, L},
481 {0x0d02, 0x0d03, L},
482 {0x0d05, 0x0d0c, L},
483 {0x0d0e, 0x0d10, L},
484 {0x0d12, 0x0d28, L},
485 {0x0d2a, 0x0d39, L},
486 {0x0d3e, 0x0d40, L},
487 {0x0d41, 0x0d43, NSM},
488 {0x0d46, 0x0d48, L},
489 {0x0d4a, 0x0d4c, L},
490 {0x0d4d, 0x0d4d, NSM},
491 {0x0d57, 0x0d57, L},
492 {0x0d60, 0x0d61, L},
493 {0x0d66, 0x0d6f, L},
494 {0x0d82, 0x0d83, L},
495 {0x0d85, 0x0d96, L},
496 {0x0d9a, 0x0db1, L},
497 {0x0db3, 0x0dbb, L},
498 {0x0dbd, 0x0dbd, L},
499 {0x0dc0, 0x0dc6, L},
500 {0x0dca, 0x0dca, NSM},
501 {0x0dcf, 0x0dd1, L},
502 {0x0dd2, 0x0dd4, NSM},
503 {0x0dd6, 0x0dd6, NSM},
504 {0x0dd8, 0x0ddf, L},
505 {0x0df2, 0x0df4, L},
506 {0x0e01, 0x0e30, L},
507 {0x0e31, 0x0e31, NSM},
508 {0x0e32, 0x0e33, L},
509 {0x0e34, 0x0e3a, NSM},
510 {0x0e3f, 0x0e3f, ET},
511 {0x0e40, 0x0e46, L},
512 {0x0e47, 0x0e4e, NSM},
513 {0x0e4f, 0x0e5b, L},
514 {0x0e81, 0x0e82, L},
515 {0x0e84, 0x0e84, L},
516 {0x0e87, 0x0e88, L},
517 {0x0e8a, 0x0e8a, L},
518 {0x0e8d, 0x0e8d, L},
519 {0x0e94, 0x0e97, L},
520 {0x0e99, 0x0e9f, L},
521 {0x0ea1, 0x0ea3, L},
522 {0x0ea5, 0x0ea5, L},
523 {0x0ea7, 0x0ea7, L},
524 {0x0eaa, 0x0eab, L},
525 {0x0ead, 0x0eb0, L},
526 {0x0eb1, 0x0eb1, NSM},
527 {0x0eb2, 0x0eb3, L},
528 {0x0eb4, 0x0eb9, NSM},
529 {0x0ebb, 0x0ebc, NSM},
530 {0x0ebd, 0x0ebd, L},
531 {0x0ec0, 0x0ec4, L},
532 {0x0ec6, 0x0ec6, L},
533 {0x0ec8, 0x0ecd, NSM},
534 {0x0ed0, 0x0ed9, L},
535 {0x0edc, 0x0edd, L},
536 {0x0f00, 0x0f17, L},
537 {0x0f18, 0x0f19, NSM},
538 {0x0f1a, 0x0f34, L},
539 {0x0f35, 0x0f35, NSM},
540 {0x0f36, 0x0f36, L},
541 {0x0f37, 0x0f37, NSM},
542 {0x0f38, 0x0f38, L},
543 {0x0f39, 0x0f39, NSM},
544 {0x0f3e, 0x0f47, L},
545 {0x0f49, 0x0f6a, L},
546 {0x0f71, 0x0f7e, NSM},
547 {0x0f7f, 0x0f7f, L},
548 {0x0f80, 0x0f84, NSM},
549 {0x0f85, 0x0f85, L},
550 {0x0f86, 0x0f87, NSM},
551 {0x0f88, 0x0f8b, L},
552 {0x0f90, 0x0f97, NSM},
553 {0x0f99, 0x0fbc, NSM},
554 {0x0fbe, 0x0fc5, L},
555 {0x0fc6, 0x0fc6, NSM},
556 {0x0fc7, 0x0fcc, L},
557 {0x0fcf, 0x0fcf, L},
558 {0x1000, 0x1021, L},
559 {0x1023, 0x1027, L},
560 {0x1029, 0x102a, L},
561 {0x102c, 0x102c, L},
562 {0x102d, 0x1030, NSM},
563 {0x1031, 0x1031, L},
564 {0x1032, 0x1032, NSM},
565 {0x1036, 0x1037, NSM},
566 {0x1038, 0x1038, L},
567 {0x1039, 0x1039, NSM},
568 {0x1040, 0x1057, L},
569 {0x1058, 0x1059, NSM},
570 {0x10a0, 0x10c5, L},
571 {0x10d0, 0x10f8, L},
572 {0x10fb, 0x10fb, L},
573 {0x1100, 0x1159, L},
574 {0x115f, 0x11a2, L},
575 {0x11a8, 0x11f9, L},
576 {0x1200, 0x1206, L},
577 {0x1208, 0x1246, L},
578 {0x1248, 0x1248, L},
579 {0x124a, 0x124d, L},
580 {0x1250, 0x1256, L},
581 {0x1258, 0x1258, L},
582 {0x125a, 0x125d, L},
583 {0x1260, 0x1286, L},
584 {0x1288, 0x1288, L},
585 {0x128a, 0x128d, L},
586 {0x1290, 0x12ae, L},
587 {0x12b0, 0x12b0, L},
588 {0x12b2, 0x12b5, L},
589 {0x12b8, 0x12be, L},
590 {0x12c0, 0x12c0, L},
591 {0x12c2, 0x12c5, L},
592 {0x12c8, 0x12ce, L},
593 {0x12d0, 0x12d6, L},
594 {0x12d8, 0x12ee, L},
595 {0x12f0, 0x130e, L},
596 {0x1310, 0x1310, L},
597 {0x1312, 0x1315, L},
598 {0x1318, 0x131e, L},
599 {0x1320, 0x1346, L},
600 {0x1348, 0x135a, L},
601 {0x1361, 0x137c, L},
602 {0x13a0, 0x13f4, L},
603 {0x1401, 0x1676, L},
604 {0x1680, 0x1680, WS},
605 {0x1681, 0x169a, L},
606 {0x16a0, 0x16f0, L},
607 {0x1700, 0x170c, L},
608 {0x170e, 0x1711, L},
609 {0x1712, 0x1714, NSM},
610 {0x1720, 0x1731, L},
611 {0x1732, 0x1734, NSM},
612 {0x1735, 0x1736, L},
613 {0x1740, 0x1751, L},
614 {0x1752, 0x1753, NSM},
615 {0x1760, 0x176c, L},
616 {0x176e, 0x1770, L},
617 {0x1772, 0x1773, NSM},
618 {0x1780, 0x17b6, L},
619 {0x17b7, 0x17bd, NSM},
620 {0x17be, 0x17c5, L},
621 {0x17c6, 0x17c6, NSM},
622 {0x17c7, 0x17c8, L},
623 {0x17c9, 0x17d3, NSM},
624 {0x17d4, 0x17da, L},
625 {0x17db, 0x17db, ET},
626 {0x17dc, 0x17dc, L},
627 {0x17dd, 0x17dd, NSM},
628 {0x17e0, 0x17e9, L},
629 {0x180b, 0x180d, NSM},
630 {0x180e, 0x180e, WS},
631 {0x1810, 0x1819, L},
632 {0x1820, 0x1877, L},
633 {0x1880, 0x18a8, L},
634 {0x18a9, 0x18a9, NSM},
635 {0x1900, 0x191c, L},
636 {0x1920, 0x1922, NSM},
637 {0x1923, 0x1926, L},
638 {0x1927, 0x192b, NSM},
639 {0x1930, 0x1931, L},
640 {0x1932, 0x1932, NSM},
641 {0x1933, 0x1938, L},
642 {0x1939, 0x193b, NSM},
643 {0x1946, 0x196d, L},
644 {0x1970, 0x1974, L},
645 {0x1d00, 0x1d6b, L},
646 {0x1e00, 0x1e9b, L},
647 {0x1ea0, 0x1ef9, L},
648 {0x1f00, 0x1f15, L},
649 {0x1f18, 0x1f1d, L},
650 {0x1f20, 0x1f45, L},
651 {0x1f48, 0x1f4d, L},
652 {0x1f50, 0x1f57, L},
653 {0x1f59, 0x1f59, L},
654 {0x1f5b, 0x1f5b, L},
655 {0x1f5d, 0x1f5d, L},
656 {0x1f5f, 0x1f7d, L},
657 {0x1f80, 0x1fb4, L},
658 {0x1fb6, 0x1fbc, L},
659 {0x1fbe, 0x1fbe, L},
660 {0x1fc2, 0x1fc4, L},
661 {0x1fc6, 0x1fcc, L},
662 {0x1fd0, 0x1fd3, L},
663 {0x1fd6, 0x1fdb, L},
664 {0x1fe0, 0x1fec, L},
665 {0x1ff2, 0x1ff4, L},
666 {0x1ff6, 0x1ffc, L},
667 {0x2000, 0x200a, WS},
668 {0x200b, 0x200d, BN},
669 {0x200e, 0x200e, L},
670 {0x200f, 0x200f, R},
671 {0x2028, 0x2028, WS},
672 {0x2029, 0x2029, B},
673 {0x202a, 0x202a, LRE},
674 {0x202b, 0x202b, RLE},
675 {0x202c, 0x202c, PDF},
676 {0x202d, 0x202d, LRO},
677 {0x202e, 0x202e, RLO},
678 {0x202f, 0x202f, WS},
679 {0x2030, 0x2034, ET},
680 {0x2044, 0x2044, CS},
681 {0x205f, 0x205f, WS},
682 {0x2060, 0x2063, BN},
683 {0x206a, 0x206f, BN},
684 {0x2070, 0x2070, EN},
685 {0x2071, 0x2071, L},
686 {0x2074, 0x2079, EN},
687 {0x207a, 0x207b, ET},
688 {0x207f, 0x207f, L},
689 {0x2080, 0x2089, EN},
690 {0x208a, 0x208b, ET},
691 {0x20a0, 0x20b1, ET},
692 {0x20d0, 0x20ea, NSM},
693 {0x2102, 0x2102, L},
694 {0x2107, 0x2107, L},
695 {0x210a, 0x2113, L},
696 {0x2115, 0x2115, L},
697 {0x2119, 0x211d, L},
698 {0x2124, 0x2124, L},
699 {0x2126, 0x2126, L},
700 {0x2128, 0x2128, L},
701 {0x212a, 0x212d, L},
702 {0x212e, 0x212e, ET},
703 {0x212f, 0x2131, L},
704 {0x2133, 0x2139, L},
705 {0x213d, 0x213f, L},
706 {0x2145, 0x2149, L},
707 {0x2160, 0x2183, L},
708 {0x2212, 0x2213, ET},
709 {0x2336, 0x237a, L},
710 {0x2395, 0x2395, L},
711 {0x2488, 0x249b, EN},
712 {0x249c, 0x24e9, L},
713 {0x2800, 0x28ff, L},
714 {0x3000, 0x3000, WS},
715 {0x3005, 0x3007, L},
716 {0x3021, 0x3029, L},
717 {0x302a, 0x302f, NSM},
718 {0x3031, 0x3035, L},
719 {0x3038, 0x303c, L},
720 {0x3041, 0x3096, L},
721 {0x3099, 0x309a, NSM},
722 {0x309d, 0x309f, L},
723 {0x30a1, 0x30fa, L},
724 {0x30fc, 0x30ff, L},
725 {0x3105, 0x312c, L},
726 {0x3131, 0x318e, L},
727 {0x3190, 0x31b7, L},
728 {0x31f0, 0x321c, L},
729 {0x3220, 0x3243, L},
730 {0x3260, 0x327b, L},
731 {0x327f, 0x32b0, L},
732 {0x32c0, 0x32cb, L},
733 {0x32d0, 0x32fe, L},
734 {0x3300, 0x3376, L},
735 {0x337b, 0x33dd, L},
736 {0x33e0, 0x33fe, L},
737 {0x3400, 0x4db5, L},
738 {0x4e00, 0x9fa5, L},
739 {0xa000, 0xa48c, L},
740 {0xac00, 0xd7a3, L},
741 {0xd800, 0xfa2d, L},
742 {0xfa30, 0xfa6a, L},
743 {0xfb00, 0xfb06, L},
744 {0xfb13, 0xfb17, L},
745 {0xfb1d, 0xfb1d, R},
746 {0xfb1e, 0xfb1e, NSM},
747 {0xfb1f, 0xfb28, R},
748 {0xfb29, 0xfb29, ET},
749 {0xfb2a, 0xfb36, R},
750 {0xfb38, 0xfb3c, R},
751 {0xfb3e, 0xfb3e, R},
752 {0xfb40, 0xfb41, R},
753 {0xfb43, 0xfb44, R},
754 {0xfb46, 0xfb4f, R},
755 {0xfb50, 0xfbb1, AL},
756 {0xfbd3, 0xfd3d, AL},
757 {0xfd50, 0xfd8f, AL},
758 {0xfd92, 0xfdc7, AL},
759 {0xfdf0, 0xfdfc, AL},
760 {0xfe00, 0xfe0f, NSM},
761 {0xfe20, 0xfe23, NSM},
762 {0xfe50, 0xfe50, CS},
763 {0xfe52, 0xfe52, CS},
764 {0xfe55, 0xfe55, CS},
765 {0xfe5f, 0xfe5f, ET},
766 {0xfe62, 0xfe63, ET},
767 {0xfe69, 0xfe6a, ET},
768 {0xfe70, 0xfe74, AL},
769 {0xfe76, 0xfefc, AL},
770 {0xfeff, 0xfeff, BN},
771 {0xff03, 0xff05, ET},
772 {0xff0b, 0xff0b, ET},
773 {0xff0c, 0xff0c, CS},
774 {0xff0d, 0xff0d, ET},
775 {0xff0e, 0xff0e, CS},
776 {0xff0f, 0xff0f, ES},
777 {0xff10, 0xff19, EN},
778 {0xff1a, 0xff1a, CS},
779 {0xff21, 0xff3a, L},
780 {0xff41, 0xff5a, L},
781 {0xff66, 0xffbe, L},
782 {0xffc2, 0xffc7, L},
783 {0xffca, 0xffcf, L},
784 {0xffd2, 0xffd7, L},
785 {0xffda, 0xffdc, L},
786 {0xffe0, 0xffe1, ET},
787 {0xffe5, 0xffe6, ET},
788 {0x10000, 0x1000b, L},
789 {0x1000d, 0x10026, L},
790 {0x10028, 0x1003a, L},
791 {0x1003c, 0x1003d, L},
792 {0x1003f, 0x1004d, L},
793 {0x10050, 0x1005d, L},
794 {0x10080, 0x100fa, L},
795 {0x10100, 0x10100, L},
796 {0x10102, 0x10102, L},
797 {0x10107, 0x10133, L},
798 {0x10137, 0x1013f, L},
799 {0x10300, 0x1031e, L},
800 {0x10320, 0x10323, L},
801 {0x10330, 0x1034a, L},
802 {0x10380, 0x1039d, L},
803 {0x1039f, 0x1039f, L},
804 {0x10400, 0x1049d, L},
805 {0x104a0, 0x104a9, L},
806 {0x10800, 0x10805, R},
807 {0x10808, 0x10808, R},
808 {0x1080a, 0x10835, R},
809 {0x10837, 0x10838, R},
810 {0x1083c, 0x1083c, R},
811 {0x1083f, 0x1083f, R},
812 {0x1d000, 0x1d0f5, L},
813 {0x1d100, 0x1d126, L},
814 {0x1d12a, 0x1d166, L},
815 {0x1d167, 0x1d169, NSM},
816 {0x1d16a, 0x1d172, L},
817 {0x1d173, 0x1d17a, BN},
818 {0x1d17b, 0x1d182, NSM},
819 {0x1d183, 0x1d184, L},
820 {0x1d185, 0x1d18b, NSM},
821 {0x1d18c, 0x1d1a9, L},
822 {0x1d1aa, 0x1d1ad, NSM},
823 {0x1d1ae, 0x1d1dd, L},
824 {0x1d400, 0x1d454, L},
825 {0x1d456, 0x1d49c, L},
826 {0x1d49e, 0x1d49f, L},
827 {0x1d4a2, 0x1d4a2, L},
828 {0x1d4a5, 0x1d4a6, L},
829 {0x1d4a9, 0x1d4ac, L},
830 {0x1d4ae, 0x1d4b9, L},
831 {0x1d4bb, 0x1d4bb, L},
832 {0x1d4bd, 0x1d4c3, L},
833 {0x1d4c5, 0x1d505, L},
834 {0x1d507, 0x1d50a, L},
835 {0x1d50d, 0x1d514, L},
836 {0x1d516, 0x1d51c, L},
837 {0x1d51e, 0x1d539, L},
838 {0x1d53b, 0x1d53e, L},
839 {0x1d540, 0x1d544, L},
840 {0x1d546, 0x1d546, L},
841 {0x1d54a, 0x1d550, L},
842 {0x1d552, 0x1d6a3, L},
843 {0x1d6a8, 0x1d7c9, L},
844 {0x1d7ce, 0x1d7ff, EN},
845 {0x20000, 0x2a6d6, L},
846 {0x2f800, 0x2fa1d, L},
847 {0xe0001, 0xe0001, BN},
848 {0xe0020, 0xe007f, BN},
849 {0xe0100, 0xe01ef, NSM},
850 {0xf0000, 0xffffd, L},
d7891209 851 {0x100000, 0x10fffd, L}
7bc1ffdf 852 };
853
854 int i, j, k;
855
856 i = -1;
857 j = lenof(lookup);
858
197c43dd 859 while (j - i > 1) {
7bc1ffdf 860 k = (i + j) / 2;
861 if (ch < lookup[k].first)
862 j = k;
863 else if (ch > lookup[k].last)
864 i = k;
865 else
866 return lookup[k].type;
867 }
868
869 /*
870 * If we reach here, the character was not in any of the
871 * intervals listed in the lookup table. This means we return
872 * ON (`Other Neutrals'). This is the appropriate code for any
873 * character genuinely not listed in the Unicode table, and
874 * also the table above has deliberately left out any
875 * characters _explicitly_ listed as ON (to save space!).
876 */
877 return ON;
f0fccd51 878}
879
880/*
881 * The most significant 2 bits of each level are used to store
882 * Override status of each character
883 * This function sets the override bits of level according
884 * to the value in override, and reurns the new byte.
885 */
886unsigned char setOverrideBits(unsigned char level, unsigned char override)
887{
cd985a32 888 if (override == ON)
f0fccd51 889 return level;
cd985a32 890 else if (override == R)
f0fccd51 891 return level | OISR;
cd985a32 892 else if (override == L)
f0fccd51 893 return level | OISL;
894 return level;
895}
896
598b33ba 897/*
898 * Find the most recent run of the same value in `level', and
899 * return the value _before_ it. Used to process U+202C POP
900 * DIRECTIONAL FORMATTING.
901 */
902int getPreviousLevel(unsigned char* level, int from)
f0fccd51 903{
598b33ba 904 if (from > 0) {
905 unsigned char current = level[--from];
906
907 while (from >= 0 && level[from] == current)
908 from--;
909
910 if (from >= 0)
911 return level[from];
912
913 return -1;
914 } else
915 return -1;
f0fccd51 916}
917
f0fccd51 918/* The Main shaping function, and the only one to be used
919 * by the outside world.
920 *
921 * line: buffer to apply shaping to. this must be passed by doBidi() first
922 * to: output buffer for the shaped data
923 * count: number of characters in line
924 */
925int do_shape(bidi_char *line, bidi_char *to, int count)
926{
927 int i, tempShape, ligFlag;
928
cd985a32 929 for (ligFlag=i=0; i<count; i++) {
f0fccd51 930 to[i] = line[i];
931 tempShape = STYPE(line[i].wc);
cd985a32 932 switch (tempShape) {
f0fccd51 933 case SC:
934 break;
935
936 case SU:
937 break;
938
939 case SR:
598b33ba 940 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
cd985a32 941 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 942 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
943 else
944 to[i].wc = SISOLATED(line[i].wc);
945 break;
946
947
948 case SD:
949 /* Make Ligatures */
598b33ba 950 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
cd985a32 951 if (line[i].wc == 0x644) {
952 if (i > 0) switch (line[i-1].wc) {
f0fccd51 953 case 0x622:
954 ligFlag = 1;
cd985a32 955 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 956 to[i].wc = 0xFEF6;
957 else
958 to[i].wc = 0xFEF5;
959 break;
960 case 0x623:
961 ligFlag = 1;
cd985a32 962 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 963 to[i].wc = 0xFEF8;
964 else
965 to[i].wc = 0xFEF7;
966 break;
967 case 0x625:
968 ligFlag = 1;
cd985a32 969 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 970 to[i].wc = 0xFEFA;
971 else
972 to[i].wc = 0xFEF9;
973 break;
974 case 0x627:
975 ligFlag = 1;
cd985a32 976 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
f0fccd51 977 to[i].wc = 0xFEFC;
978 else
979 to[i].wc = 0xFEFB;
980 break;
981 }
cd985a32 982 if (ligFlag) {
f0fccd51 983 to[i-1].wc = 0x20;
984 ligFlag = 0;
985 break;
986 }
987 }
988
cd985a32 989 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
598b33ba 990 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
cd985a32 991 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
992 to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
f0fccd51 993 else
994 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
995 break;
996 }
997
598b33ba 998 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
cd985a32 999 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
f0fccd51 1000 to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
1001 else
1002 to[i].wc = SISOLATED(line[i].wc);
1003 break;
1004
1005
1006 }
1007 }
1008 return 1;
1009}
1010
1011/*
1012 * The Main Bidi Function, and the only function that should
1013 * be used by the outside world.
1014 *
1015 * line: a buffer of size count containing text to apply
1016 * the Bidirectional algorithm to.
1017 */
1018
1019int do_bidi(bidi_char *line, int count)
1020{
1021 unsigned char* types;
1022 unsigned char* levels;
1023 unsigned char paragraphLevel;
1024 unsigned char currentEmbedding;
1025 unsigned char currentOverride;
1026 unsigned char tempType;
1027 int i, j, imax, yes, bover;
1028
1029 /* Check the presence of R or AL types as optimization */
1030 yes = 0;
cd985a32 1031 for (i=0; i<count; i++) {
31626f30 1032 int type = getType(line[i].wc);
1033 if (type == R || type == AL) {
f0fccd51 1034 yes = 1;
1035 break;
1036 }
1037 }
cd985a32 1038 if (yes == 0)
f0fccd51 1039 return L;
1040
1041 /* Initialize types, levels */
31626f30 1042 types = snewn(count, unsigned char);
1043 levels = snewn(count, unsigned char);
f0fccd51 1044
1045 /* Rule (P1) NOT IMPLEMENTED
1046 * P1. Split the text into separate paragraphs. A paragraph separator is
1047 * kept with the previous paragraph. Within each paragraph, apply all the
1048 * other rules of this algorithm.
1049 */
1050
1051 /* Rule (P2), (P3)
1052 * P2. In each paragraph, find the first character of type L, AL, or R.
1053 * P3. If a character is found in P2 and it is of type AL or R, then set
1054 * the paragraph embedding level to one; otherwise, set it to zero.
1055 */
1056 paragraphLevel = 0;
cd985a32 1057 for (i=0; i<count ; i++) {
31626f30 1058 int type = getType(line[i].wc);
1059 if (type == R || type == AL) {
f0fccd51 1060 paragraphLevel = 1;
1061 break;
31626f30 1062 } else if (type == L)
f0fccd51 1063 break;
1064 }
1065
1066 /* Rule (X1)
1067 * X1. Begin by setting the current embedding level to the paragraph
1068 * embedding level. Set the directional override status to neutral.
1069 */
1070 currentEmbedding = paragraphLevel;
1071 currentOverride = ON;
1072
1073 /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
1074 * X2. With each RLE, compute the least greater odd embedding level.
1075 * X3. With each LRE, compute the least greater even embedding level.
1076 * X4. With each RLO, compute the least greater odd embedding level.
1077 * X5. With each LRO, compute the least greater even embedding level.
1078 * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
1079 * a. Set the level of the current character to the current
1080 * embedding level.
1081 * b. Whenever the directional override status is not neutral,
1082 * reset the current character type to the directional
1083 * override status.
1084 * X7. With each PDF, determine the matching embedding or override code.
1085 * If there was a valid matching code, restore (pop) the last
1086 * remembered (pushed) embedding level and directional override.
1087 * X8. All explicit directional embeddings and overrides are completely
1088 * terminated at the end of each paragraph. Paragraph separators are not
1089 * included in the embedding. (Useless here) NOT IMPLEMENTED
1090 */
1091 bover = 0;
cd985a32 1092 for (i=0; i<count; i++) {
f0fccd51 1093 tempType = getType(line[i].wc);
cd985a32 1094 switch (tempType) {
f0fccd51 1095 case RLE:
1096 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1097 levels[i] = setOverrideBits(levels[i], currentOverride);
1098 currentOverride = ON;
1099 break;
1100
1101 case LRE:
1102 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1103 levels[i] = setOverrideBits(levels[i], currentOverride);
1104 currentOverride = ON;
1105 break;
1106
1107 case RLO:
1108 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1109 tempType = currentOverride = R;
1110 bover = 1;
1111 break;
1112
1113 case LRO:
1114 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1115 tempType = currentOverride = L;
1116 bover = 1;
1117 break;
1118
1119 case PDF:
598b33ba 1120 {
1121 int prevlevel = getPreviousLevel(levels, i);
1122
1123 if (prevlevel == -1) {
1124 currentEmbedding = paragraphLevel;
1125 currentOverride = ON;
1126 } else {
1127 currentOverride = currentEmbedding & OMASK;
1128 currentEmbedding = currentEmbedding & ~OMASK;
1129 }
1130 }
f0fccd51 1131 levels[i] = currentEmbedding;
1132 break;
1133
1134 /* Whitespace is treated as neutral for now */
1135 case WS:
1136 case S:
1137 levels[i] = currentEmbedding;
1138 tempType = ON;
cd985a32 1139 if (currentOverride != ON)
f0fccd51 1140 tempType = currentOverride;
1141 break;
1142
1143 default:
1144 levels[i] = currentEmbedding;
cd985a32 1145 if (currentOverride != ON)
f0fccd51 1146 tempType = currentOverride;
1147 break;
1148
1149 }
1150 types[i] = tempType;
1151 }
1152 /* this clears out all overrides, so we can use levels safely... */
1153 /* checks bover first */
cd985a32 1154 if (bover)
1155 for (i=0; i<count; i++)
f0fccd51 1156 levels[i] = levels[i] & LMASK;
1157
1158 /* Rule (X9)
1159 * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
1160 * Here, they're converted to BN.
1161 */
cd985a32 1162 for (i=0; i<count; i++) {
1163 switch (types[i]) {
f0fccd51 1164 case RLE:
1165 case LRE:
1166 case RLO:
1167 case LRO:
1168 case PDF:
1169 types[i] = BN;
1170 break;
1171 }
1172 }
1173
1174 /* Rule (W1)
1175 * W1. Examine each non-spacing mark (NSM) in the level run, and change
1176 * the type of the NSM to the type of the previous character. If the NSM
1177 * is at the start of the level run, it will get the type of sor.
1178 */
cd985a32 1179 if (types[0] == NSM)
f0fccd51 1180 types[0] = paragraphLevel;
1181
cd985a32 1182 for (i=1; i<count; i++) {
1183 if (types[i] == NSM)
f0fccd51 1184 types[i] = types[i-1];
1185 /* Is this a safe assumption?
1186 * I assumed the previous, IS a character.
1187 */
1188 }
1189
1190 /* Rule (W2)
1191 * W2. Search backwards from each instance of a European number until the
1192 * first strong type (R, L, AL, or sor) is found. If an AL is found,
1193 * change the type of the European number to Arabic number.
1194 */
cd985a32 1195 for (i=0; i<count; i++) {
1196 if (types[i] == EN) {
f0fccd51 1197 j=i;
cd985a32 1198 while (j >= 0) {
1199 if (types[j] == AL) {
f0fccd51 1200 types[i] = AN;
1201 break;
cd985a32 1202 } else if (types[j] == R || types[j] == L) {
1203 break;
1204 }
f0fccd51 1205 j--;
1206 }
1207 }
1208 }
1209
1210 /* Rule (W3)
1211 * W3. Change all ALs to R.
1212 *
1213 * Optimization: on Rule Xn, we might set a flag on AL type
1214 * to prevent this loop in L R lines only...
1215 */
cd985a32 1216 for (i=0; i<count; i++) {
1217 if (types[i] == AL)
f0fccd51 1218 types[i] = R;
1219 }
1220
1221 /* Rule (W4)
1222 * W4. A single European separator between two European numbers changes
1223 * to a European number. A single common separator between two numbers
1224 * of the same type changes to that type.
1225 */
cd985a32 1226 for (i=1; i<(count-1); i++) {
1227 if (types[i] == ES) {
1228 if (types[i-1] == EN && types[i+1] == EN)
f0fccd51 1229 types[i] = EN;
cd985a32 1230 } else if (types[i] == CS) {
1231 if (types[i-1] == EN && types[i+1] == EN)
1232 types[i] = EN;
1233 else if (types[i-1] == AN && types[i+1] == AN)
1234 types[i] = AN;
1235 }
f0fccd51 1236 }
1237
1238 /* Rule (W5)
1239 * W5. A sequence of European terminators adjacent to European numbers
1240 * changes to all European numbers.
1241 *
1242 * Optimization: lots here... else ifs need rearrangement
1243 */
cd985a32 1244 for (i=0; i<count; i++) {
1245 if (types[i] == ET) {
1246 if (i > 0 && types[i-1] == EN) {
f0fccd51 1247 types[i] = EN;
1248 continue;
cd985a32 1249 } else if (i < count-1 && types[i+1] == EN) {
1250 types[i] = EN;
1251 continue;
1252 } else if (i < count-1 && types[i+1] == ET) {
1253 j=i;
1254 while (j <count && types[j] == ET) {
1255 j++;
1256 }
1257 if (types[j] == EN)
1258 types[i] = EN;
1259 }
f0fccd51 1260 }
1261 }
1262
1263 /* Rule (W6)
1264 * W6. Otherwise, separators and terminators change to Other Neutral:
1265 */
cd985a32 1266 for (i=0; i<count; i++) {
1267 switch (types[i]) {
f0fccd51 1268 case ES:
1269 case ET:
1270 case CS:
1271 types[i] = ON;
1272 break;
1273 }
1274 }
1275
1276 /* Rule (W7)
1277 * W7. Search backwards from each instance of a European number until
1278 * the first strong type (R, L, or sor) is found. If an L is found,
1279 * then change the type of the European number to L.
1280 */
cd985a32 1281 for (i=0; i<count; i++) {
1282 if (types[i] == EN) {
f0fccd51 1283 j=i;
cd985a32 1284 while (j >= 0) {
1285 if (types[j] == L) {
f0fccd51 1286 types[i] = L;
1287 break;
cd985a32 1288 } else if (types[j] == R || types[j] == AL) {
f0fccd51 1289 break;
1290 }
1291 j--;
1292 }
1293 }
1294 }
1295
1296 /* Rule (N1)
1297 * N1. A sequence of neutrals takes the direction of the surrounding
1298 * strong text if the text on both sides has the same direction. European
1299 * and Arabic numbers are treated as though they were R.
1300 */
cd985a32 1301 if (count >= 2 && types[0] == ON) {
1302 if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
f0fccd51 1303 types[0] = R;
cd985a32 1304 else if (types[1] == L)
f0fccd51 1305 types[0] = L;
1306 }
cd985a32 1307 for (i=1; i<(count-1); i++) {
1308 if (types[i] == ON) {
1309 if (types[i-1] == L) {
f0fccd51 1310 j=i;
cd985a32 1311 while (j<(count-1) && types[j] == ON) {
f0fccd51 1312 j++;
1313 }
cd985a32 1314 if (types[j] == L) {
1315 while (i<j) {
f0fccd51 1316 types[i] = L;
1317 i++;
1318 }
1319 }
1320
cd985a32 1321 } else if ((types[i-1] == R) ||
1322 (types[i-1] == EN) ||
1323 (types[i-1] == AN)) {
1324 j=i;
1325 while (j<(count-1) && types[j] == ON) {
1326 j++;
1327 }
1328 if ((types[j] == R) ||
1329 (types[j] == EN) ||
1330 (types[j] == AN)) {
1331 while (i<j) {
1332 types[i] = R;
1333 i++;
1334 }
1335 }
1336 }
f0fccd51 1337 }
1338 }
cd985a32 1339 if (count >= 2 && types[count-1] == ON) {
1340 if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
f0fccd51 1341 types[count-1] = R;
cd985a32 1342 else if (types[count-2] == L)
f0fccd51 1343 types[count-1] = L;
1344 }
1345
1346 /* Rule (N2)
1347 * N2. Any remaining neutrals take the embedding direction.
1348 */
cd985a32 1349 for (i=0; i<count; i++) {
1350 if (types[i] == ON) {
1351 if ((levels[i] % 2) == 0)
f0fccd51 1352 types[i] = L;
1353 else
1354 types[i] = R;
1355 }
1356 }
1357
1358 /* Rule (I1)
1359 * I1. For all characters with an even (left-to-right) embedding
1360 * direction, those of type R go up one level and those of type AN or
1361 * EN go up two levels.
1362 */
cd985a32 1363 for (i=0; i<count; i++) {
1364 if ((levels[i] % 2) == 0) {
1365 if (types[i] == R)
f0fccd51 1366 levels[i] += 1;
cd985a32 1367 else if (types[i] == AN || types[i] == EN)
f0fccd51 1368 levels[i] += 2;
1369 }
1370 }
1371
1372 /* Rule (I2)
1373 * I2. For all characters with an odd (right-to-left) embedding direction,
1374 * those of type L, EN or AN go up one level.
1375 */
cd985a32 1376 for (i=0; i<count; i++) {
1377 if ((levels[i] % 2) == 1) {
1378 if (types[i] == L || types[i] == EN || types[i] == AN)
f0fccd51 1379 levels[i] += 1;
1380 }
1381 }
1382
1383 /* Rule (L1)
1384 * L1. On each line, reset the embedding level of the following characters
1385 * to the paragraph embedding level:
1386 * (1)segment separators, (2)paragraph separators,
1387 * (3)any sequence of whitespace characters preceding
1388 * a segment separator or paragraph separator,
1389 * (4)and any sequence of white space characters
1390 * at the end of the line.
1391 * The types of characters used here are the original types, not those
1392 * modified by the previous phase.
1393 */
1394 j=count-1;
cd985a32 1395 while (j>0 && (getType(line[j].wc) == WS)) {
f0fccd51 1396 j--;
1397 }
cd985a32 1398 if (j < (count-1)) {
1399 for (j++; j<count; j++)
f0fccd51 1400 levels[j] = paragraphLevel;
1401 }
cd985a32 1402 for (i=0; i<count; i++) {
f0fccd51 1403 tempType = getType(line[i].wc);
cd985a32 1404 if (tempType == WS) {
f0fccd51 1405 j=i;
cd985a32 1406 while (j<count && (getType(line[j].wc) == WS)) {
f0fccd51 1407 j++;
1408 }
cd985a32 1409 if (j==count || getType(line[j].wc) == B ||
1410 getType(line[j].wc) == S) {
1411 for (j--; j>=i ; j--) {
f0fccd51 1412 levels[j] = paragraphLevel;
1413 }
1414 }
cd985a32 1415 } else if (tempType == B || tempType == S) {
1416 levels[i] = paragraphLevel;
1417 }
f0fccd51 1418 }
1419
1420 /* Rule (L4) NOT IMPLEMENTED
1421 * L4. A character that possesses the mirrored property as specified by
1422 * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
1423 * resolved directionality of that character is R.
1424 */
1425 /* Note: this is implemented before L2 for efficiency */
cd985a32 1426 for (i=0; i<count; i++)
1427 if ((levels[i] % 2) == 1)
f0fccd51 1428 doMirror(&line[i].wc);
1429
1430 /* Rule (L2)
1431 * L2. From the highest level found in the text to the lowest odd level on
1432 * each line, including intermediate levels not actually present in the
1433 * text, reverse any contiguous sequence of characters that are at that
1434 * level or higher
1435 */
1436 /* we flip the character string and leave the level array */
1437 imax = 0;
1438 i=0;
1439 tempType = levels[0];
cd985a32 1440 while (i < count) {
1441 if (levels[i] > tempType) {
f0fccd51 1442 tempType = levels[i];
1443 imax=i;
1444 }
1445 i++;
1446 }
1447 /* maximum level in tempType, its index in imax. */
cd985a32 1448 while (tempType > 0) { /* loop from highest level to the least odd, */
1449 /* which i assume is 1 */
f0fccd51 1450 flipThisRun(line, levels, tempType, count);
1451 tempType--;
1452 }
1453
1454 /* Rule (L3) NOT IMPLEMENTED
1455 * L3. Combining marks applied to a right-to-left base character will at
1456 * this point precede their base character. If the rendering engine
1457 * expects them to follow the base characters in the final display
1458 * process, then the ordering of the marks and the base character must
1459 * be reversed.
1460 */
31626f30 1461 sfree(types);
1462 sfree(levels);
f0fccd51 1463 return R;
1464}
1465
1466
1467/*
cd985a32 1468 * Bad, Horrible function
f0fccd51 1469 * takes a pointer to a character that is checked for
1470 * having a mirror glyph.
1471 */
1472void doMirror(wchar_t* ch)
1473{
cd985a32 1474 if ((*ch & 0xFF00) == 0) {
1475 switch (*ch) {
1476 case 0x0028: *ch = 0x0029; break;
1477 case 0x0029: *ch = 0x0028; break;
1478 case 0x003C: *ch = 0x003E; break;
1479 case 0x003E: *ch = 0x003C; break;
1480 case 0x005B: *ch = 0x005D; break;
1481 case 0x005D: *ch = 0x005B; break;
1482 case 0x007B: *ch = 0x007D; break;
1483 case 0x007D: *ch = 0x007B; break;
1484 case 0x00AB: *ch = 0x00BB; break;
1485 case 0x00BB: *ch = 0x00AB; break;
f0fccd51 1486 }
cd985a32 1487 } else if ((*ch & 0xFF00) == 0x2000) {
1488 switch (*ch) {
1489 case 0x2039: *ch = 0x203A; break;
1490 case 0x203A: *ch = 0x2039; break;
1491 case 0x2045: *ch = 0x2046; break;
1492 case 0x2046: *ch = 0x2045; break;
1493 case 0x207D: *ch = 0x207E; break;
1494 case 0x207E: *ch = 0x207D; break;
1495 case 0x208D: *ch = 0x208E; break;
1496 case 0x208E: *ch = 0x208D; break;
f0fccd51 1497 }
cd985a32 1498 } else if ((*ch & 0xFF00) == 0x2200) {
1499 switch (*ch) {
1500 case 0x2208: *ch = 0x220B; break;
1501 case 0x2209: *ch = 0x220C; break;
1502 case 0x220A: *ch = 0x220D; break;
1503 case 0x220B: *ch = 0x2208; break;
1504 case 0x220C: *ch = 0x2209; break;
1505 case 0x220D: *ch = 0x220A; break;
1506 case 0x2215: *ch = 0x29F5; break;
1507 case 0x223C: *ch = 0x223D; break;
1508 case 0x223D: *ch = 0x223C; break;
1509 case 0x2243: *ch = 0x22CD; break;
1510 case 0x2252: *ch = 0x2253; break;
1511 case 0x2253: *ch = 0x2252; break;
1512 case 0x2254: *ch = 0x2255; break;
1513 case 0x2255: *ch = 0x2254; break;
1514 case 0x2264: *ch = 0x2265; break;
1515 case 0x2265: *ch = 0x2264; break;
1516 case 0x2266: *ch = 0x2267; break;
1517 case 0x2267: *ch = 0x2266; break;
1518 case 0x2268: *ch = 0x2269; break;
1519 case 0x2269: *ch = 0x2268; break;
1520 case 0x226A: *ch = 0x226B; break;
1521 case 0x226B: *ch = 0x226A; break;
1522 case 0x226E: *ch = 0x226F; break;
1523 case 0x226F: *ch = 0x226E; break;
1524 case 0x2270: *ch = 0x2271; break;
1525 case 0x2271: *ch = 0x2270; break;
1526 case 0x2272: *ch = 0x2273; break;
1527 case 0x2273: *ch = 0x2272; break;
1528 case 0x2274: *ch = 0x2275; break;
1529 case 0x2275: *ch = 0x2274; break;
1530 case 0x2276: *ch = 0x2277; break;
1531 case 0x2277: *ch = 0x2276; break;
1532 case 0x2278: *ch = 0x2279; break;
1533 case 0x2279: *ch = 0x2278; break;
1534 case 0x227A: *ch = 0x227B; break;
1535 case 0x227B: *ch = 0x227A; break;
1536 case 0x227C: *ch = 0x227D; break;
1537 case 0x227D: *ch = 0x227C; break;
1538 case 0x227E: *ch = 0x227F; break;
1539 case 0x227F: *ch = 0x227E; break;
1540 case 0x2280: *ch = 0x2281; break;
1541 case 0x2281: *ch = 0x2280; break;
1542 case 0x2282: *ch = 0x2283; break;
1543 case 0x2283: *ch = 0x2282; break;
1544 case 0x2284: *ch = 0x2285; break;
1545 case 0x2285: *ch = 0x2284; break;
1546 case 0x2286: *ch = 0x2287; break;
1547 case 0x2287: *ch = 0x2286; break;
1548 case 0x2288: *ch = 0x2289; break;
1549 case 0x2289: *ch = 0x2288; break;
1550 case 0x228A: *ch = 0x228B; break;
1551 case 0x228B: *ch = 0x228A; break;
1552 case 0x228F: *ch = 0x2290; break;
1553 case 0x2290: *ch = 0x228F; break;
1554 case 0x2291: *ch = 0x2292; break;
1555 case 0x2292: *ch = 0x2291; break;
1556 case 0x2298: *ch = 0x29B8; break;
1557 case 0x22A2: *ch = 0x22A3; break;
1558 case 0x22A3: *ch = 0x22A2; break;
1559 case 0x22A6: *ch = 0x2ADE; break;
1560 case 0x22A8: *ch = 0x2AE4; break;
1561 case 0x22A9: *ch = 0x2AE3; break;
1562 case 0x22AB: *ch = 0x2AE5; break;
1563 case 0x22B0: *ch = 0x22B1; break;
1564 case 0x22B1: *ch = 0x22B0; break;
1565 case 0x22B2: *ch = 0x22B3; break;
1566 case 0x22B3: *ch = 0x22B2; break;
1567 case 0x22B4: *ch = 0x22B5; break;
1568 case 0x22B5: *ch = 0x22B4; break;
1569 case 0x22B6: *ch = 0x22B7; break;
1570 case 0x22B7: *ch = 0x22B6; break;
1571 case 0x22C9: *ch = 0x22CA; break;
1572 case 0x22CA: *ch = 0x22C9; break;
1573 case 0x22CB: *ch = 0x22CC; break;
1574 case 0x22CC: *ch = 0x22CB; break;
1575 case 0x22CD: *ch = 0x2243; break;
1576 case 0x22D0: *ch = 0x22D1; break;
1577 case 0x22D1: *ch = 0x22D0; break;
1578 case 0x22D6: *ch = 0x22D7; break;
1579 case 0x22D7: *ch = 0x22D6; break;
1580 case 0x22D8: *ch = 0x22D9; break;
1581 case 0x22D9: *ch = 0x22D8; break;
1582 case 0x22DA: *ch = 0x22DB; break;
1583 case 0x22DB: *ch = 0x22DA; break;
1584 case 0x22DC: *ch = 0x22DD; break;
1585 case 0x22DD: *ch = 0x22DC; break;
1586 case 0x22DE: *ch = 0x22DF; break;
1587 case 0x22DF: *ch = 0x22DE; break;
1588 case 0x22E0: *ch = 0x22E1; break;
1589 case 0x22E1: *ch = 0x22E0; break;
1590 case 0x22E2: *ch = 0x22E3; break;
1591 case 0x22E3: *ch = 0x22E2; break;
1592 case 0x22E4: *ch = 0x22E5; break;
1593 case 0x22E5: *ch = 0x22E4; break;
1594 case 0x22E6: *ch = 0x22E7; break;
1595 case 0x22E7: *ch = 0x22E6; break;
1596 case 0x22E8: *ch = 0x22E9; break;
1597 case 0x22E9: *ch = 0x22E8; break;
1598 case 0x22EA: *ch = 0x22EB; break;
1599 case 0x22EB: *ch = 0x22EA; break;
1600 case 0x22EC: *ch = 0x22ED; break;
1601 case 0x22ED: *ch = 0x22EC; break;
1602 case 0x22F0: *ch = 0x22F1; break;
1603 case 0x22F1: *ch = 0x22F0; break;
1604 case 0x22F2: *ch = 0x22FA; break;
1605 case 0x22F3: *ch = 0x22FB; break;
1606 case 0x22F4: *ch = 0x22FC; break;
1607 case 0x22F6: *ch = 0x22FD; break;
1608 case 0x22F7: *ch = 0x22FE; break;
1609 case 0x22FA: *ch = 0x22F2; break;
1610 case 0x22FB: *ch = 0x22F3; break;
1611 case 0x22FC: *ch = 0x22F4; break;
1612 case 0x22FD: *ch = 0x22F6; break;
1613 case 0x22FE: *ch = 0x22F7; break;
f0fccd51 1614 }
cd985a32 1615 } else if ((*ch & 0xFF00) == 0x2300) {
1616 switch (*ch) {
1617 case 0x2308: *ch = 0x2309; break;
1618 case 0x2309: *ch = 0x2308; break;
1619 case 0x230A: *ch = 0x230B; break;
1620 case 0x230B: *ch = 0x230A; break;
1621 case 0x2329: *ch = 0x232A; break;
1622 case 0x232A: *ch = 0x2329; break;
1623 }
1624 } else if ((*ch & 0xFF00) == 0x2700) {
1625 switch (*ch) {
1626 case 0x2768: *ch = 0x2769; break;
1627 case 0x2769: *ch = 0x2768; break;
1628 case 0x276A: *ch = 0x276B; break;
1629 case 0x276B: *ch = 0x276A; break;
1630 case 0x276C: *ch = 0x276D; break;
1631 case 0x276D: *ch = 0x276C; break;
1632 case 0x276E: *ch = 0x276F; break;
1633 case 0x276F: *ch = 0x276E; break;
1634 case 0x2770: *ch = 0x2771; break;
1635 case 0x2771: *ch = 0x2770; break;
1636 case 0x2772: *ch = 0x2773; break;
1637 case 0x2773: *ch = 0x2772; break;
1638 case 0x2774: *ch = 0x2775; break;
1639 case 0x2775: *ch = 0x2774; break;
1640 case 0x27D5: *ch = 0x27D6; break;
1641 case 0x27D6: *ch = 0x27D5; break;
1642 case 0x27DD: *ch = 0x27DE; break;
1643 case 0x27DE: *ch = 0x27DD; break;
1644 case 0x27E2: *ch = 0x27E3; break;
1645 case 0x27E3: *ch = 0x27E2; break;
1646 case 0x27E4: *ch = 0x27E5; break;
1647 case 0x27E5: *ch = 0x27E4; break;
1648 case 0x27E6: *ch = 0x27E7; break;
1649 case 0x27E7: *ch = 0x27E6; break;
1650 case 0x27E8: *ch = 0x27E9; break;
1651 case 0x27E9: *ch = 0x27E8; break;
1652 case 0x27EA: *ch = 0x27EB; break;
1653 case 0x27EB: *ch = 0x27EA; break;
f0fccd51 1654 }
cd985a32 1655 } else if ((*ch & 0xFF00) == 0x2900) {
1656 switch (*ch) {
1657 case 0x2983: *ch = 0x2984; break;
1658 case 0x2984: *ch = 0x2983; break;
1659 case 0x2985: *ch = 0x2986; break;
1660 case 0x2986: *ch = 0x2985; break;
1661 case 0x2987: *ch = 0x2988; break;
1662 case 0x2988: *ch = 0x2987; break;
1663 case 0x2989: *ch = 0x298A; break;
1664 case 0x298A: *ch = 0x2989; break;
1665 case 0x298B: *ch = 0x298C; break;
1666 case 0x298C: *ch = 0x298B; break;
1667 case 0x298D: *ch = 0x2990; break;
1668 case 0x298E: *ch = 0x298F; break;
1669 case 0x298F: *ch = 0x298E; break;
1670 case 0x2990: *ch = 0x298D; break;
1671 case 0x2991: *ch = 0x2992; break;
1672 case 0x2992: *ch = 0x2991; break;
1673 case 0x2993: *ch = 0x2994; break;
1674 case 0x2994: *ch = 0x2993; break;
1675 case 0x2995: *ch = 0x2996; break;
1676 case 0x2996: *ch = 0x2995; break;
1677 case 0x2997: *ch = 0x2998; break;
1678 case 0x2998: *ch = 0x2997; break;
1679 case 0x29B8: *ch = 0x2298; break;
1680 case 0x29C0: *ch = 0x29C1; break;
1681 case 0x29C1: *ch = 0x29C0; break;
1682 case 0x29C4: *ch = 0x29C5; break;
1683 case 0x29C5: *ch = 0x29C4; break;
1684 case 0x29CF: *ch = 0x29D0; break;
1685 case 0x29D0: *ch = 0x29CF; break;
1686 case 0x29D1: *ch = 0x29D2; break;
1687 case 0x29D2: *ch = 0x29D1; break;
1688 case 0x29D4: *ch = 0x29D5; break;
1689 case 0x29D5: *ch = 0x29D4; break;
1690 case 0x29D8: *ch = 0x29D9; break;
1691 case 0x29D9: *ch = 0x29D8; break;
1692 case 0x29DA: *ch = 0x29DB; break;
1693 case 0x29DB: *ch = 0x29DA; break;
1694 case 0x29F5: *ch = 0x2215; break;
1695 case 0x29F8: *ch = 0x29F9; break;
1696 case 0x29F9: *ch = 0x29F8; break;
1697 case 0x29FC: *ch = 0x29FD; break;
1698 case 0x29FD: *ch = 0x29FC; break;
f0fccd51 1699 }
cd985a32 1700 } else if ((*ch & 0xFF00) == 0x2A00) {
1701 switch (*ch) {
1702 case 0x2A2B: *ch = 0x2A2C; break;
1703 case 0x2A2C: *ch = 0x2A2B; break;
1704 case 0x2A2D: *ch = 0x2A2C; break;
1705 case 0x2A2E: *ch = 0x2A2D; break;
1706 case 0x2A34: *ch = 0x2A35; break;
1707 case 0x2A35: *ch = 0x2A34; break;
1708 case 0x2A3C: *ch = 0x2A3D; break;
1709 case 0x2A3D: *ch = 0x2A3C; break;
1710 case 0x2A64: *ch = 0x2A65; break;
1711 case 0x2A65: *ch = 0x2A64; break;
1712 case 0x2A79: *ch = 0x2A7A; break;
1713 case 0x2A7A: *ch = 0x2A79; break;
1714 case 0x2A7D: *ch = 0x2A7E; break;
1715 case 0x2A7E: *ch = 0x2A7D; break;
1716 case 0x2A7F: *ch = 0x2A80; break;
1717 case 0x2A80: *ch = 0x2A7F; break;
1718 case 0x2A81: *ch = 0x2A82; break;
1719 case 0x2A82: *ch = 0x2A81; break;
1720 case 0x2A83: *ch = 0x2A84; break;
1721 case 0x2A84: *ch = 0x2A83; break;
1722 case 0x2A8B: *ch = 0x2A8C; break;
1723 case 0x2A8C: *ch = 0x2A8B; break;
1724 case 0x2A91: *ch = 0x2A92; break;
1725 case 0x2A92: *ch = 0x2A91; break;
1726 case 0x2A93: *ch = 0x2A94; break;
1727 case 0x2A94: *ch = 0x2A93; break;
1728 case 0x2A95: *ch = 0x2A96; break;
1729 case 0x2A96: *ch = 0x2A95; break;
1730 case 0x2A97: *ch = 0x2A98; break;
1731 case 0x2A98: *ch = 0x2A97; break;
1732 case 0x2A99: *ch = 0x2A9A; break;
1733 case 0x2A9A: *ch = 0x2A99; break;
1734 case 0x2A9B: *ch = 0x2A9C; break;
1735 case 0x2A9C: *ch = 0x2A9B; break;
1736 case 0x2AA1: *ch = 0x2AA2; break;
1737 case 0x2AA2: *ch = 0x2AA1; break;
1738 case 0x2AA6: *ch = 0x2AA7; break;
1739 case 0x2AA7: *ch = 0x2AA6; break;
1740 case 0x2AA8: *ch = 0x2AA9; break;
1741 case 0x2AA9: *ch = 0x2AA8; break;
1742 case 0x2AAA: *ch = 0x2AAB; break;
1743 case 0x2AAB: *ch = 0x2AAA; break;
1744 case 0x2AAC: *ch = 0x2AAD; break;
1745 case 0x2AAD: *ch = 0x2AAC; break;
1746 case 0x2AAF: *ch = 0x2AB0; break;
1747 case 0x2AB0: *ch = 0x2AAF; break;
1748 case 0x2AB3: *ch = 0x2AB4; break;
1749 case 0x2AB4: *ch = 0x2AB3; break;
1750 case 0x2ABB: *ch = 0x2ABC; break;
1751 case 0x2ABC: *ch = 0x2ABB; break;
1752 case 0x2ABD: *ch = 0x2ABE; break;
1753 case 0x2ABE: *ch = 0x2ABD; break;
1754 case 0x2ABF: *ch = 0x2AC0; break;
1755 case 0x2AC0: *ch = 0x2ABF; break;
1756 case 0x2AC1: *ch = 0x2AC2; break;
1757 case 0x2AC2: *ch = 0x2AC1; break;
1758 case 0x2AC3: *ch = 0x2AC4; break;
1759 case 0x2AC4: *ch = 0x2AC3; break;
1760 case 0x2AC5: *ch = 0x2AC6; break;
1761 case 0x2AC6: *ch = 0x2AC5; break;
1762 case 0x2ACD: *ch = 0x2ACE; break;
1763 case 0x2ACE: *ch = 0x2ACD; break;
1764 case 0x2ACF: *ch = 0x2AD0; break;
1765 case 0x2AD0: *ch = 0x2ACF; break;
1766 case 0x2AD1: *ch = 0x2AD2; break;
1767 case 0x2AD2: *ch = 0x2AD1; break;
1768 case 0x2AD3: *ch = 0x2AD4; break;
1769 case 0x2AD4: *ch = 0x2AD3; break;
1770 case 0x2AD5: *ch = 0x2AD6; break;
1771 case 0x2AD6: *ch = 0x2AD5; break;
1772 case 0x2ADE: *ch = 0x22A6; break;
1773 case 0x2AE3: *ch = 0x22A9; break;
1774 case 0x2AE4: *ch = 0x22A8; break;
1775 case 0x2AE5: *ch = 0x22AB; break;
1776 case 0x2AEC: *ch = 0x2AED; break;
1777 case 0x2AED: *ch = 0x2AEC; break;
1778 case 0x2AF7: *ch = 0x2AF8; break;
1779 case 0x2AF8: *ch = 0x2AF7; break;
1780 case 0x2AF9: *ch = 0x2AFA; break;
1781 case 0x2AFA: *ch = 0x2AF9; break;
f0fccd51 1782 }
cd985a32 1783 } else if ((*ch & 0xFF00) == 0x3000) {
1784 switch (*ch) {
1785 case 0x3008: *ch = 0x3009; break;
1786 case 0x3009: *ch = 0x3008; break;
1787 case 0x300A: *ch = 0x300B; break;
1788 case 0x300B: *ch = 0x300A; break;
1789 case 0x300C: *ch = 0x300D; break;
1790 case 0x300D: *ch = 0x300C; break;
1791 case 0x300E: *ch = 0x300F; break;
1792 case 0x300F: *ch = 0x300E; break;
1793 case 0x3010: *ch = 0x3011; break;
1794 case 0x3011: *ch = 0x3010; break;
1795 case 0x3014: *ch = 0x3015; break;
1796 case 0x3015: *ch = 0x3014; break;
1797 case 0x3016: *ch = 0x3017; break;
1798 case 0x3017: *ch = 0x3016; break;
1799 case 0x3018: *ch = 0x3019; break;
1800 case 0x3019: *ch = 0x3018; break;
1801 case 0x301A: *ch = 0x301B; break;
1802 case 0x301B: *ch = 0x301A; break;
f0fccd51 1803 }
cd985a32 1804 } else if ((*ch & 0xFF00) == 0xFF00) {
1805 switch (*ch) {
1806 case 0xFF08: *ch = 0xFF09; break;
1807 case 0xFF09: *ch = 0xFF08; break;
1808 case 0xFF1C: *ch = 0xFF1E; break;
1809 case 0xFF1E: *ch = 0xFF1C; break;
1810 case 0xFF3B: *ch = 0xFF3D; break;
1811 case 0xFF3D: *ch = 0xFF3B; break;
1812 case 0xFF5B: *ch = 0xFF5D; break;
1813 case 0xFF5D: *ch = 0xFF5B; break;
1814 case 0xFF5F: *ch = 0xFF60; break;
1815 case 0xFF60: *ch = 0xFF5F; break;
1816 case 0xFF62: *ch = 0xFF63; break;
1817 case 0xFF63: *ch = 0xFF62; break;
f0fccd51 1818 }
1819 }
1820}
197c43dd 1821
1822#ifdef TEST_GETTYPE
1823
1824#include <stdio.h>
1825#include <assert.h>
1826
1827int main(int argc, char **argv)
1828{
1829 static const struct { int type; char *name; } typetoname[] = {
1830#define TYPETONAME(X) { X , #X }
1831 TYPETONAME(L),
1832 TYPETONAME(LRE),
1833 TYPETONAME(LRO),
1834 TYPETONAME(R),
1835 TYPETONAME(AL),
1836 TYPETONAME(RLE),
1837 TYPETONAME(RLO),
1838 TYPETONAME(PDF),
1839 TYPETONAME(EN),
1840 TYPETONAME(ES),
1841 TYPETONAME(ET),
1842 TYPETONAME(AN),
1843 TYPETONAME(CS),
1844 TYPETONAME(NSM),
1845 TYPETONAME(BN),
1846 TYPETONAME(B),
1847 TYPETONAME(S),
1848 TYPETONAME(WS),
1849 TYPETONAME(ON),
1850#undef TYPETONAME
1851 };
1852 int i;
1853
1854 for (i = 1; i < argc; i++) {
1855 unsigned long chr = strtoul(argv[i], NULL, 0);
1856 int type = getType(chr);
1857 assert(typetoname[type].type == type);
1858 printf("U+%04x: %s\n", chr, typetoname[type].name);
1859 }
1860
1861 return 0;
1862}
1863
1864#endif