Further clarity and speed cleanups of minibidi:
[u/mdw/putty] / minibidi.c
1 /************************************************************************
2 * $Id$
3 *
4 * ------------
5 * Description:
6 * ------------
7 * This is an implemention of Unicode's Bidirectional Algorithm
8 * (known as UAX #9).
9 *
10 * http://www.unicode.org/reports/tr9/
11 *
12 * Author: Ahmad Khalifa
13 *
14 * -----------------
15 * Revision Details: (Updated by Revision Control System)
16 * -----------------
17 * $Date$
18 * $Author$
19 * $Revision$
20 *
21 * (www.arabeyes.org - under MIT license)
22 *
23 ************************************************************************/
24
25 /*
26 * TODO:
27 * =====
28 * - Explicit marks need to be handled (they are not 100% now)
29 * - Ligatures
30 */
31
32 #include <stdlib.h> /* definition of wchar_t*/
33
34 #include "misc.h"
35
36 #define LMASK 0x3F /* Embedding Level mask */
37 #define OMASK 0xC0 /* Override mask */
38 #define OISL 0x80 /* Override is L */
39 #define OISR 0x40 /* Override is R */
40
41 /* Shaping Helpers */
42 #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \
43 shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/
44 #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
45 #define SFINAL(xh) ((xh)+1)
46 #define SINITIAL(xh) ((xh)+2)
47 #define SMEDIAL(ch) ((ch)+3)
48
49 #define leastGreaterOdd(x) ( ((x)+1) | 1 )
50 #define leastGreaterEven(x) ( ((x)+2) &~ 1 )
51
52 typedef struct bidi_char {
53 wchar_t origwc, wc;
54 unsigned short index;
55 } bidi_char;
56
57 /* function declarations */
58 void flipThisRun(bidi_char *from, unsigned char* level, int max, int count);
59 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel);
60 unsigned char getType(wchar_t ch);
61 unsigned char setOverrideBits(unsigned char level, unsigned char override);
62 int getPreviousLevel(unsigned char* level, int from);
63 unsigned char getRLE(wchar_t ch);
64 int do_shape(bidi_char *line, bidi_char *to, int count);
65 int do_bidi(bidi_char *line, int count);
66 void doMirror(wchar_t* ch);
67
68 /* character types */
69 enum {
70 L,
71 LRE,
72 LRO,
73 R,
74 AL,
75 RLE,
76 RLO,
77 PDF,
78 EN,
79 ES,
80 ET,
81 AN,
82 CS,
83 NSM,
84 BN,
85 B,
86 S,
87 WS,
88 ON,
89 };
90
91 /* Shaping Types */
92 enum {
93 SL, /* Left-Joining, doesnt exist in U+0600 - U+06FF */
94 SR, /* Right-Joining, ie has Isolated, Final */
95 SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
96 SU, /* Non-Joining */
97 SC /* Join-Causing, like U+0640 (TATWEEL) */
98 };
99
100 typedef struct {
101 char type;
102 wchar_t form_b;
103 } shape_node;
104
105 /* Kept near the actual table, for verification. */
106 #define SHAPE_FIRST 0x621
107 #define SHAPE_LAST 0x64A
108
109 const shape_node shapetypes[] = {
110 /* index, Typ, Iso, Ligature Index*/
111 /* 621 */ {SU, 0xFE80},
112 /* 622 */ {SR, 0xFE81},
113 /* 623 */ {SR, 0xFE83},
114 /* 624 */ {SR, 0xFE85},
115 /* 625 */ {SR, 0xFE87},
116 /* 626 */ {SD, 0xFE89},
117 /* 627 */ {SR, 0xFE8D},
118 /* 628 */ {SD, 0xFE8F},
119 /* 629 */ {SR, 0xFE93},
120 /* 62A */ {SD, 0xFE95},
121 /* 62B */ {SD, 0xFE99},
122 /* 62C */ {SD, 0xFE9D},
123 /* 62D */ {SD, 0xFEA1},
124 /* 62E */ {SD, 0xFEA5},
125 /* 62F */ {SR, 0xFEA9},
126 /* 630 */ {SR, 0xFEAB},
127 /* 631 */ {SR, 0xFEAD},
128 /* 632 */ {SR, 0xFEAF},
129 /* 633 */ {SD, 0xFEB1},
130 /* 634 */ {SD, 0xFEB5},
131 /* 635 */ {SD, 0xFEB9},
132 /* 636 */ {SD, 0xFEBD},
133 /* 637 */ {SD, 0xFEC1},
134 /* 638 */ {SD, 0xFEC5},
135 /* 639 */ {SD, 0xFEC9},
136 /* 63A */ {SD, 0xFECD},
137 /* 63B */ {SU, 0x0},
138 /* 63C */ {SU, 0x0},
139 /* 63D */ {SU, 0x0},
140 /* 63E */ {SU, 0x0},
141 /* 63F */ {SU, 0x0},
142 /* 640 */ {SC, 0x0},
143 /* 641 */ {SD, 0xFED1},
144 /* 642 */ {SD, 0xFED5},
145 /* 643 */ {SD, 0xFED9},
146 /* 644 */ {SD, 0xFEDD},
147 /* 645 */ {SD, 0xFEE1},
148 /* 646 */ {SD, 0xFEE5},
149 /* 647 */ {SD, 0xFEE9},
150 /* 648 */ {SR, 0xFEED},
151 /* 649 */ {SR, 0xFEEF}, /* SD */
152 /* 64A */ {SD, 0xFEF1},
153 };
154
155 /*
156 * This describes the data byte and its frequency
157 */
158 typedef struct {
159 unsigned char d;
160 unsigned char f;
161 } RLENode;
162
163
164 /* This is an array of RLENodes, which is the
165 * Compressed unicode types table
166 */
167 const RLENode RLE_table[] = {
168 { BN, 9}, { S, 1}, { B, 1}, { S, 1}, { WS, 1},
169 { B, 1}, { BN, 14}, { B, 3}, { S, 1}, { WS, 1},
170 { ON, 2}, { ET, 3}, { ON, 5}, { ET, 1}, { CS, 1},
171 { ET, 1}, { CS, 1}, { ES, 1}, { EN, 10}, { CS, 1},
172 { ON, 6}, { L, 26}, { ON, 6}, { L, 26}, { ON, 4},
173 { BN, 6}, { B, 1}, { BN, 26}, { CS, 1}, { ON, 1},
174 { ET, 4}, { ON, 4}, { L, 1}, { ON, 5}, { ET, 2},
175 { EN, 2}, { ON, 1}, { L, 1}, { ON, 3}, { EN, 1},
176 { L, 1}, { ON, 5}, { L, 23}, { ON, 1}, { L, 31},
177 { ON, 1}, { L, 255}, { L, 42}, { ON, 1}, { L, 18},
178 { ON, 28}, { L, 94}, { ON, 2}, { L, 9}, { ON, 2},
179 { L, 7}, { ON, 14}, { L, 2}, { ON, 14}, { L, 5},
180 { ON, 9}, { L, 1}, { ON, 17}, {NSM, 80}, { ON, 16},
181 {NSM, 16}, { ON, 10}, { L, 1}, { ON, 11}, { L, 1},
182 { ON, 1}, { L, 3}, { ON, 1}, { L, 1}, { ON, 1},
183 { L, 20}, { ON, 1}, { L, 44}, { ON, 1}, { L, 38},
184 { ON, 10}, { L, 131}, {NSM, 4}, { ON, 1}, {NSM, 2},
185 { L, 69}, { ON, 1}, { L, 38}, { ON, 2}, { L, 2},
186 { ON, 6}, { L, 16}, { ON, 33}, { L, 38}, { ON, 2},
187 { L, 7}, { ON, 1}, { L, 39}, { ON, 1}, { L, 1},
188 { ON, 7}, {NSM, 17}, { ON, 1}, {NSM, 23}, { ON, 1},
189 {NSM, 3}, { R, 1}, {NSM, 1}, { R, 1}, {NSM, 2},
190 { R, 1}, {NSM, 1}, { ON, 11}, { R, 27}, { ON, 5},
191 { R, 5}, { ON, 23}, { CS, 1}, { ON, 14}, { AL, 1},
192 { ON, 3}, { AL, 1}, { ON, 1}, { AL, 26}, { ON, 5},
193 { AL, 11}, {NSM, 11}, { ON, 10}, { AN, 10}, { ET, 1},
194 { AN, 2}, { AL, 3}, {NSM, 1}, { AL, 101}, {NSM, 7},
195 { AL, 1}, {NSM, 7}, { AL, 2}, {NSM, 2}, { ON, 1},
196 {NSM, 4}, { ON, 2}, { EN, 10}, { AL, 5}, { ON, 1},
197 { AL, 14}, { ON, 1}, { BN, 1}, { AL, 1}, {NSM, 1},
198 { AL, 27}, { ON, 3}, {NSM, 27}, { ON, 53}, { AL, 38},
199 {NSM, 11}, { AL, 1}, { ON, 255}, { ON, 80}, {NSM, 2},
200 { L, 1}, { ON, 1}, { L, 53}, { ON, 2}, {NSM, 1},
201 { L, 4}, {NSM, 8}, { L, 4}, {NSM, 1}, { ON, 2},
202 { L, 1}, {NSM, 4}, { ON, 3}, { L, 10}, {NSM, 2},
203 { L, 13}, { ON, 16}, {NSM, 1}, { L, 2}, { ON, 1},
204 { L, 8}, { ON, 2}, { L, 2}, { ON, 2}, { L, 22},
205 { ON, 1}, { L, 7}, { ON, 1}, { L, 1}, { ON, 3},
206 { L, 4}, { ON, 2}, {NSM, 1}, { ON, 1}, { L, 3},
207 {NSM, 4}, { ON, 2}, { L, 2}, { ON, 2}, { L, 2},
208 {NSM, 1}, { ON, 9}, { L, 1}, { ON, 4}, { L, 2},
209 { ON, 1}, { L, 3}, {NSM, 2}, { ON, 2}, { L, 12},
210 { ET, 2}, { L, 7}, { ON, 7}, {NSM, 1}, { ON, 2},
211 { L, 6}, { ON, 4}, { L, 2}, { ON, 2}, { L, 22},
212 { ON, 1}, { L, 7}, { ON, 1}, { L, 2}, { ON, 1},
213 { L, 2}, { ON, 1}, { L, 2}, { ON, 2}, {NSM, 1},
214 { ON, 1}, { L, 3}, {NSM, 2}, { ON, 4}, {NSM, 2},
215 { ON, 2}, {NSM, 3}, { ON, 11}, { L, 4}, { ON, 1},
216 { L, 1}, { ON, 7}, { L, 10}, {NSM, 2}, { L, 3},
217 { ON, 12}, {NSM, 2}, { L, 1}, { ON, 1}, { L, 7},
218 { ON, 1}, { L, 1}, { ON, 1}, { L, 3}, { ON, 1},
219 { L, 22}, { ON, 1}, { L, 7}, { ON, 1}, { L, 2},
220 { ON, 1}, { L, 5}, { ON, 2}, {NSM, 1}, { L, 4},
221 {NSM, 5}, { ON, 1}, {NSM, 2}, { L, 1}, { ON, 1},
222 { L, 2}, {NSM, 1}, { ON, 2}, { L, 1}, { ON, 15},
223 { L, 1}, { ON, 5}, { L, 10}, { ON, 17}, {NSM, 1},
224 { L, 2}, { ON, 1}, { L, 8}, { ON, 2}, { L, 2},
225 { ON, 2}, { L, 22}, { ON, 1}, { L, 7}, { ON, 1},
226 { L, 2}, { ON, 2}, { L, 4}, { ON, 2}, {NSM, 1},
227 { L, 2}, {NSM, 1}, { L, 1}, {NSM, 3}, { ON, 3},
228 { L, 2}, { ON, 2}, { L, 2}, {NSM, 1}, { ON, 8},
229 {NSM, 1}, { L, 1}, { ON, 4}, { L, 2}, { ON, 1},
230 { L, 3}, { ON, 4}, { L, 11}, { ON, 17}, {NSM, 1},
231 { L, 1}, { ON, 1}, { L, 6}, { ON, 3}, { L, 3},
232 { ON, 1}, { L, 4}, { ON, 3}, { L, 2}, { ON, 1},
233 { L, 1}, { ON, 1}, { L, 2}, { ON, 3}, { L, 2},
234 { ON, 3}, { L, 3}, { ON, 3}, { L, 8}, { ON, 1},
235 { L, 3}, { ON, 4}, { L, 2}, {NSM, 1}, { L, 2},
236 { ON, 3}, { L, 3}, { ON, 1}, { L, 3}, {NSM, 1},
237 { ON, 9}, { L, 1}, { ON, 15}, { L, 12}, { ON, 14},
238 { L, 3}, { ON, 1}, { L, 8}, { ON, 1}, { L, 3},
239 { ON, 1}, { L, 23}, { ON, 1}, { L, 10}, { ON, 1},
240 { L, 5}, { ON, 4}, {NSM, 3}, { L, 4}, { ON, 1},
241 {NSM, 3}, { ON, 1}, {NSM, 4}, { ON, 7}, {NSM, 2},
242 { ON, 9}, { L, 2}, { ON, 4}, { L, 10}, { ON, 18},
243 { L, 2}, { ON, 1}, { L, 8}, { ON, 1}, { L, 3},
244 { ON, 1}, { L, 23}, { ON, 1}, { L, 10}, { ON, 1},
245 { L, 5}, { ON, 4}, { L, 1}, {NSM, 1}, { L, 5},
246 { ON, 1}, {NSM, 1}, { L, 2}, { ON, 1}, { L, 2},
247 {NSM, 2}, { ON, 7}, { L, 2}, { ON, 7}, { L, 1},
248 { ON, 1}, { L, 2}, { ON, 4}, { L, 10}, { ON, 18},
249 { L, 2}, { ON, 1}, { L, 8}, { ON, 1}, { L, 3},
250 { ON, 1}, { L, 23}, { ON, 1}, { L, 16}, { ON, 4},
251 { L, 3}, {NSM, 3}, { ON, 2}, { L, 3}, { ON, 1},
252 { L, 3}, {NSM, 1}, { ON, 9}, { L, 1}, { ON, 8},
253 { L, 2}, { ON, 4}, { L, 10}, { ON, 18}, { L, 2},
254 { ON, 1}, { L, 18}, { ON, 3}, { L, 24}, { ON, 1},
255 { L, 9}, { ON, 1}, { L, 1}, { ON, 2}, { L, 7},
256 { ON, 3}, {NSM, 1}, { ON, 4}, { L, 3}, {NSM, 3},
257 { ON, 1}, {NSM, 1}, { ON, 1}, { L, 8}, { ON, 18},
258 { L, 3}, { ON, 12}, { L, 48}, {NSM, 1}, { L, 2},
259 {NSM, 7}, { ON, 4}, { ET, 1}, { L, 7}, {NSM, 8},
260 { L, 13}, { ON, 37}, { L, 2}, { ON, 1}, { L, 1},
261 { ON, 2}, { L, 2}, { ON, 1}, { L, 1}, { ON, 2},
262 { L, 1}, { ON, 6}, { L, 4}, { ON, 1}, { L, 7},
263 { ON, 1}, { L, 3}, { ON, 1}, { L, 1}, { ON, 1},
264 { L, 1}, { ON, 2}, { L, 2}, { ON, 1}, { L, 4},
265 {NSM, 1}, { L, 2}, {NSM, 6}, { ON, 1}, {NSM, 2},
266 { L, 1}, { ON, 2}, { L, 5}, { ON, 1}, { L, 1},
267 { ON, 1}, {NSM, 6}, { ON, 2}, { L, 10}, { ON, 2},
268 { L, 2}, { ON, 34}, { L, 24}, {NSM, 2}, { L, 27},
269 {NSM, 1}, { L, 1}, {NSM, 1}, { L, 1}, {NSM, 1},
270 { ON, 4}, { L, 10}, { ON, 1}, { L, 34}, { ON, 6},
271 {NSM, 14}, { L, 1}, {NSM, 5}, { L, 1}, {NSM, 2},
272 { L, 4}, { ON, 4}, {NSM, 8}, { ON, 1}, {NSM, 36},
273 { ON, 1}, { L, 8}, {NSM, 1}, { L, 6}, { ON, 2},
274 { L, 1}, { ON, 48}, { L, 34}, { ON, 1}, { L, 5},
275 { ON, 1}, { L, 2}, { ON, 1}, { L, 1}, {NSM, 4},
276 { L, 1}, {NSM, 1}, { ON, 3}, {NSM, 2}, { L, 1},
277 {NSM, 1}, { ON, 6}, { L, 24}, {NSM, 2}, { ON, 70},
278 { L, 38}, { ON, 10}, { L, 41}, { ON, 2}, { L, 1},
279 { ON, 4}, { L, 90}, { ON, 5}, { L, 68}, { ON, 5},
280 { L, 82}, { ON, 6}, { L, 7}, { ON, 1}, { L, 63},
281 { ON, 1}, { L, 1}, { ON, 1}, { L, 4}, { ON, 2},
282 { L, 7}, { ON, 1}, { L, 1}, { ON, 1}, { L, 4},
283 { ON, 2}, { L, 39}, { ON, 1}, { L, 1}, { ON, 1},
284 { L, 4}, { ON, 2}, { L, 31}, { ON, 1}, { L, 1},
285 { ON, 1}, { L, 4}, { ON, 2}, { L, 7}, { ON, 1},
286 { L, 1}, { ON, 1}, { L, 4}, { ON, 2}, { L, 7},
287 { ON, 1}, { L, 7}, { ON, 1}, { L, 23}, { ON, 1},
288 { L, 31}, { ON, 1}, { L, 1}, { ON, 1}, { L, 4},
289 { ON, 2}, { L, 7}, { ON, 1}, { L, 39}, { ON, 1},
290 { L, 19}, { ON, 6}, { L, 28}, { ON, 35}, { L, 85},
291 { ON, 12}, { L, 255}, { L, 255}, { L, 120}, { ON, 9},
292 { WS, 1}, { L, 26}, { ON, 5}, { L, 81}, { ON, 15},
293 { L, 13}, { ON, 1}, { L, 4}, {NSM, 3}, { ON, 11},
294 { L, 18}, {NSM, 3}, { L, 2}, { ON, 9}, { L, 18},
295 {NSM, 2}, { ON, 12}, { L, 13}, { ON, 1}, { L, 3},
296 { ON, 1}, {NSM, 2}, { ON, 12}, { L, 55}, {NSM, 7},
297 { L, 8}, {NSM, 1}, { L, 2}, {NSM, 11}, { L, 7},
298 { ET, 1}, { L, 1}, { ON, 3}, { L, 10}, { ON, 33},
299 {NSM, 3}, { BN, 1}, { ON, 1}, { L, 10}, { ON, 6},
300 { L, 88}, { ON, 8}, { L, 41}, {NSM, 1}, { ON, 255},
301 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 91},
302 { L, 156}, { ON, 4}, { L, 90}, { ON, 6}, { L, 22},
303 { ON, 2}, { L, 6}, { ON, 2}, { L, 38}, { ON, 2},
304 { L, 6}, { ON, 2}, { L, 8}, { ON, 1}, { L, 1},
305 { ON, 1}, { L, 1}, { ON, 1}, { L, 1}, { ON, 1},
306 { L, 31}, { ON, 2}, { L, 53}, { ON, 1}, { L, 7},
307 { ON, 1}, { L, 1}, { ON, 3}, { L, 3}, { ON, 1},
308 { L, 7}, { ON, 3}, { L, 4}, { ON, 2}, { L, 6},
309 { ON, 4}, { L, 13}, { ON, 5}, { L, 3}, { ON, 1},
310 { L, 7}, { ON, 3}, { WS, 11}, { BN, 3}, { L, 1},
311 { R, 1}, { ON, 24}, { WS, 1}, { B, 1}, {LRE, 1},
312 {RLE, 1}, {PDF, 1}, {LRO, 1}, {RLO, 1}, { WS, 1},
313 { ET, 5}, { ON, 42}, { WS, 1}, { BN, 4}, { ON, 6},
314 { BN, 6}, { EN, 1}, { L, 1}, { ON, 2}, { EN, 6},
315 { ET, 2}, { ON, 3}, { L, 1}, { EN, 10}, { ET, 2},
316 { ON, 20}, { ET, 18}, { ON, 30}, {NSM, 27}, { ON, 23},
317 { L, 1}, { ON, 4}, { L, 1}, { ON, 2}, { L, 10},
318 { ON, 1}, { L, 1}, { ON, 3}, { L, 5}, { ON, 6},
319 { L, 1}, { ON, 1}, { L, 1}, { ON, 1}, { L, 1},
320 { ON, 1}, { L, 4}, { ET, 1}, { L, 3}, { ON, 1},
321 { L, 7}, { ON, 3}, { L, 3}, { ON, 5}, { L, 5},
322 { ON, 22}, { L, 36}, { ON, 142}, { ET, 2}, { ON, 255},
323 { ON, 35}, { L, 69}, { ON, 26}, { L, 1}, { ON, 202},
324 { EN, 60}, { L, 78}, { EN, 1}, { ON, 255}, { ON, 255},
325 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
326 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 32},
327 { WS, 1}, { ON, 4}, { L, 3}, { ON, 25}, { L, 9},
328 {NSM, 6}, { ON, 1}, { L, 5}, { ON, 2}, { L, 5},
329 { ON, 4}, { L, 86}, { ON, 2}, {NSM, 2}, { ON, 2},
330 { L, 3}, { ON, 1}, { L, 90}, { ON, 1}, { L, 4},
331 { ON, 5}, { L, 40}, { ON, 4}, { L, 94}, { ON, 1},
332 { L, 40}, { ON, 56}, { L, 45}, { ON, 3}, { L, 36},
333 { ON, 28}, { L, 28}, { ON, 3}, { L, 50}, { ON, 15},
334 { L, 12}, { ON, 4}, { L, 47}, { ON, 1}, { L, 119},
335 { ON, 4}, { L, 99}, { ON, 2}, { L, 31}, { ON, 1},
336 { L, 1}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
337 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
338 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
339 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
340 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
341 { ON, 255}, { ON, 205}, { L, 1}, { ON, 74}, { L, 1},
342 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
343 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
344 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
345 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
346 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
347 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
348 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
349 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
350 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
351 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
352 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
353 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
354 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
355 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
356 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
357 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
358 { ON, 255}, { ON, 245}, { L, 1}, { ON, 90}, { L, 255},
359 { L, 255}, { L, 255}, { L, 255}, { L, 145}, { ON, 255},
360 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
361 { ON, 255}, { ON, 122}, { L, 1}, { ON, 255}, { ON, 255},
362 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
363 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
364 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
365 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
366 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
367 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
368 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
369 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
370 { ON, 255}, { ON, 205}, { L, 1}, { ON, 92}, { L, 1},
371 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 129}, { L, 2},
372 { ON, 126}, { L, 2}, { ON, 255}, { ON, 255}, { ON, 255},
373 { ON, 255}, { ON, 2}, { L, 2}, { ON, 255}, { ON, 255},
374 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
375 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
376 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
377 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255}, { ON, 255},
378 { ON, 255}, { ON, 255}, { ON, 255}, { ON, 23}, { L, 255},
379 { L, 48}, { ON, 2}, { L, 59}, { ON, 149}, { L, 7},
380 { ON, 12}, { L, 5}, { ON, 5}, { R, 1}, {NSM, 1},
381 { R, 10}, { ET, 1}, { R, 13}, { ON, 1}, { R, 5},
382 { ON, 1}, { R, 1}, { ON, 1}, { R, 2}, { ON, 1},
383 { R, 2}, { ON, 1}, { R, 10}, { AL, 98}, { ON, 33},
384 { AL, 255}, { AL, 108}, { ON, 18}, { AL, 64}, { ON, 2},
385 { AL, 54}, { ON, 40}, { AL, 13}, { ON, 3}, {NSM, 16},
386 { ON, 16}, {NSM, 4}, { ON, 44}, { CS, 1}, { ON, 1},
387 { CS, 1}, { ON, 2}, { CS, 1}, { ON, 9}, { ET, 1},
388 { ON, 2}, { ET, 2}, { ON, 5}, { ET, 2}, { ON, 5},
389 { AL, 5}, { ON, 1}, { AL, 135}, { ON, 2}, { BN, 1},
390 { ON, 3}, { ET, 3}, { ON, 5}, { ET, 1}, { CS, 1},
391 { ET, 1}, { CS, 1}, { ES, 1}, { EN, 10}, { CS, 1},
392 { ON, 6}, { L, 26}, { ON, 6}, { L, 26}, { ON, 11},
393 { L, 89}, { ON, 3}, { L, 6}, { ON, 2}, { L, 6},
394 { ON, 2}, { L, 6}, { ON, 2}, { L, 3}, { ON, 3},
395 { ET, 2}, { ON, 3}, { ET, 2}, { ON, 9}, { L, 14},
396 };
397
398 /*
399 * Flips the text buffer, according to max level, and
400 * all higher levels
401 *
402 * Input:
403 * from: text buffer, on which to apply flipping
404 * level: resolved levels buffer
405 * max: the maximum level found in this line (should be unsigned char)
406 * count: line size in bidi_char
407 */
408 void flipThisRun(bidi_char *from, unsigned char *level, int max, int count)
409 {
410 int i, j, k, tlevel;
411 bidi_char temp;
412
413 j = i = 0;
414 while (i<count && j<count) {
415
416 /* find the start of the run of level=max */
417 tlevel = max;
418 i = j = findIndexOfRun(level, i, count, max);
419 /* find the end of the run */
420 while (i<count && tlevel <= level[i]) {
421 i++;
422 }
423 for (k = i - 1; k > j; k--, j++) {
424 temp = from[k];
425 from[k] = from[j];
426 from[j] = temp;
427 }
428 }
429 }
430
431 /*
432 * Finds the index of a run with level equals tlevel
433 */
434 int findIndexOfRun(unsigned char* level , int start, int count, int tlevel)
435 {
436 int i;
437 for (i=start; i<count; i++) {
438 if (tlevel == level[i]) {
439 return i;
440 }
441 }
442 return count;
443 }
444
445 /*
446 * Returns character type of ch, by calling RLE table lookup
447 * function
448 */
449 unsigned char getType(wchar_t ch)
450 {
451 return getRLE(ch);
452 }
453
454 /*
455 * The most significant 2 bits of each level are used to store
456 * Override status of each character
457 * This function sets the override bits of level according
458 * to the value in override, and reurns the new byte.
459 */
460 unsigned char setOverrideBits(unsigned char level, unsigned char override)
461 {
462 if (override == ON)
463 return level;
464 else if (override == R)
465 return level | OISR;
466 else if (override == L)
467 return level | OISL;
468 return level;
469 }
470
471 /*
472 * Find the most recent run of the same value in `level', and
473 * return the value _before_ it. Used to process U+202C POP
474 * DIRECTIONAL FORMATTING.
475 */
476 int getPreviousLevel(unsigned char* level, int from)
477 {
478 if (from > 0) {
479 unsigned char current = level[--from];
480
481 while (from >= 0 && level[from] == current)
482 from--;
483
484 if (from >= 0)
485 return level[from];
486
487 return -1;
488 } else
489 return -1;
490 }
491
492 /*
493 * Loops over the RLE_table array looking for the
494 * type of ch
495 */
496 unsigned char getRLE(wchar_t ch)
497 {
498 int offset, i;
499
500 offset = 0;
501 for (i=0; i<lenof(RLE_table); i++) {
502 offset += RLE_table[i].f;
503 if (ch < offset)
504 return RLE_table[i].d;
505 }
506 /* anything beyond the end of the table is unknown */
507 return ON;
508 }
509
510 /* The Main shaping function, and the only one to be used
511 * by the outside world.
512 *
513 * line: buffer to apply shaping to. this must be passed by doBidi() first
514 * to: output buffer for the shaped data
515 * count: number of characters in line
516 */
517 int do_shape(bidi_char *line, bidi_char *to, int count)
518 {
519 int i, tempShape, ligFlag;
520
521 for (ligFlag=i=0; i<count; i++) {
522 to[i] = line[i];
523 tempShape = STYPE(line[i].wc);
524 switch (tempShape) {
525 case SC:
526 break;
527
528 case SU:
529 break;
530
531 case SR:
532 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
533 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
534 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
535 else
536 to[i].wc = SISOLATED(line[i].wc);
537 break;
538
539
540 case SD:
541 /* Make Ligatures */
542 tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
543 if (line[i].wc == 0x644) {
544 if (i > 0) switch (line[i-1].wc) {
545 case 0x622:
546 ligFlag = 1;
547 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
548 to[i].wc = 0xFEF6;
549 else
550 to[i].wc = 0xFEF5;
551 break;
552 case 0x623:
553 ligFlag = 1;
554 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
555 to[i].wc = 0xFEF8;
556 else
557 to[i].wc = 0xFEF7;
558 break;
559 case 0x625:
560 ligFlag = 1;
561 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
562 to[i].wc = 0xFEFA;
563 else
564 to[i].wc = 0xFEF9;
565 break;
566 case 0x627:
567 ligFlag = 1;
568 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
569 to[i].wc = 0xFEFC;
570 else
571 to[i].wc = 0xFEFB;
572 break;
573 }
574 if (ligFlag) {
575 to[i-1].wc = 0x20;
576 ligFlag = 0;
577 break;
578 }
579 }
580
581 if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
582 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
583 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
584 to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
585 else
586 to[i].wc = SFINAL((SISOLATED(line[i].wc)));
587 break;
588 }
589
590 tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
591 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
592 to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
593 else
594 to[i].wc = SISOLATED(line[i].wc);
595 break;
596
597
598 }
599 }
600 return 1;
601 }
602
603 /*
604 * The Main Bidi Function, and the only function that should
605 * be used by the outside world.
606 *
607 * line: a buffer of size count containing text to apply
608 * the Bidirectional algorithm to.
609 */
610
611 int do_bidi(bidi_char *line, int count)
612 {
613 unsigned char* types;
614 unsigned char* levels;
615 unsigned char paragraphLevel;
616 unsigned char currentEmbedding;
617 unsigned char currentOverride;
618 unsigned char tempType;
619 int i, j, imax, yes, bover;
620
621 /* Check the presence of R or AL types as optimization */
622 yes = 0;
623 for (i=0; i<count; i++) {
624 int type = getType(line[i].wc);
625 if (type == R || type == AL) {
626 yes = 1;
627 break;
628 }
629 }
630 if (yes == 0)
631 return L;
632
633 /* Initialize types, levels */
634 types = snewn(count, unsigned char);
635 levels = snewn(count, unsigned char);
636
637 /* Rule (P1) NOT IMPLEMENTED
638 * P1. Split the text into separate paragraphs. A paragraph separator is
639 * kept with the previous paragraph. Within each paragraph, apply all the
640 * other rules of this algorithm.
641 */
642
643 /* Rule (P2), (P3)
644 * P2. In each paragraph, find the first character of type L, AL, or R.
645 * P3. If a character is found in P2 and it is of type AL or R, then set
646 * the paragraph embedding level to one; otherwise, set it to zero.
647 */
648 paragraphLevel = 0;
649 for (i=0; i<count ; i++) {
650 int type = getType(line[i].wc);
651 if (type == R || type == AL) {
652 paragraphLevel = 1;
653 break;
654 } else if (type == L)
655 break;
656 }
657
658 /* Rule (X1)
659 * X1. Begin by setting the current embedding level to the paragraph
660 * embedding level. Set the directional override status to neutral.
661 */
662 currentEmbedding = paragraphLevel;
663 currentOverride = ON;
664
665 /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
666 * X2. With each RLE, compute the least greater odd embedding level.
667 * X3. With each LRE, compute the least greater even embedding level.
668 * X4. With each RLO, compute the least greater odd embedding level.
669 * X5. With each LRO, compute the least greater even embedding level.
670 * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
671 * a. Set the level of the current character to the current
672 * embedding level.
673 * b. Whenever the directional override status is not neutral,
674 * reset the current character type to the directional
675 * override status.
676 * X7. With each PDF, determine the matching embedding or override code.
677 * If there was a valid matching code, restore (pop) the last
678 * remembered (pushed) embedding level and directional override.
679 * X8. All explicit directional embeddings and overrides are completely
680 * terminated at the end of each paragraph. Paragraph separators are not
681 * included in the embedding. (Useless here) NOT IMPLEMENTED
682 */
683 bover = 0;
684 for (i=0; i<count; i++) {
685 tempType = getType(line[i].wc);
686 switch (tempType) {
687 case RLE:
688 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
689 levels[i] = setOverrideBits(levels[i], currentOverride);
690 currentOverride = ON;
691 break;
692
693 case LRE:
694 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
695 levels[i] = setOverrideBits(levels[i], currentOverride);
696 currentOverride = ON;
697 break;
698
699 case RLO:
700 currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
701 tempType = currentOverride = R;
702 bover = 1;
703 break;
704
705 case LRO:
706 currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
707 tempType = currentOverride = L;
708 bover = 1;
709 break;
710
711 case PDF:
712 {
713 int prevlevel = getPreviousLevel(levels, i);
714
715 if (prevlevel == -1) {
716 currentEmbedding = paragraphLevel;
717 currentOverride = ON;
718 } else {
719 currentOverride = currentEmbedding & OMASK;
720 currentEmbedding = currentEmbedding & ~OMASK;
721 }
722 }
723 levels[i] = currentEmbedding;
724 break;
725
726 /* Whitespace is treated as neutral for now */
727 case WS:
728 case S:
729 levels[i] = currentEmbedding;
730 tempType = ON;
731 if (currentOverride != ON)
732 tempType = currentOverride;
733 break;
734
735 default:
736 levels[i] = currentEmbedding;
737 if (currentOverride != ON)
738 tempType = currentOverride;
739 break;
740
741 }
742 types[i] = tempType;
743 }
744 /* this clears out all overrides, so we can use levels safely... */
745 /* checks bover first */
746 if (bover)
747 for (i=0; i<count; i++)
748 levels[i] = levels[i] & LMASK;
749
750 /* Rule (X9)
751 * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
752 * Here, they're converted to BN.
753 */
754 for (i=0; i<count; i++) {
755 switch (types[i]) {
756 case RLE:
757 case LRE:
758 case RLO:
759 case LRO:
760 case PDF:
761 types[i] = BN;
762 break;
763 }
764 }
765
766 /* Rule (W1)
767 * W1. Examine each non-spacing mark (NSM) in the level run, and change
768 * the type of the NSM to the type of the previous character. If the NSM
769 * is at the start of the level run, it will get the type of sor.
770 */
771 if (types[0] == NSM)
772 types[0] = paragraphLevel;
773
774 for (i=1; i<count; i++) {
775 if (types[i] == NSM)
776 types[i] = types[i-1];
777 /* Is this a safe assumption?
778 * I assumed the previous, IS a character.
779 */
780 }
781
782 /* Rule (W2)
783 * W2. Search backwards from each instance of a European number until the
784 * first strong type (R, L, AL, or sor) is found. If an AL is found,
785 * change the type of the European number to Arabic number.
786 */
787 for (i=0; i<count; i++) {
788 if (types[i] == EN) {
789 j=i;
790 while (j >= 0) {
791 if (types[j] == AL) {
792 types[i] = AN;
793 break;
794 } else if (types[j] == R || types[j] == L) {
795 break;
796 }
797 j--;
798 }
799 }
800 }
801
802 /* Rule (W3)
803 * W3. Change all ALs to R.
804 *
805 * Optimization: on Rule Xn, we might set a flag on AL type
806 * to prevent this loop in L R lines only...
807 */
808 for (i=0; i<count; i++) {
809 if (types[i] == AL)
810 types[i] = R;
811 }
812
813 /* Rule (W4)
814 * W4. A single European separator between two European numbers changes
815 * to a European number. A single common separator between two numbers
816 * of the same type changes to that type.
817 */
818 for (i=1; i<(count-1); i++) {
819 if (types[i] == ES) {
820 if (types[i-1] == EN && types[i+1] == EN)
821 types[i] = EN;
822 } else if (types[i] == CS) {
823 if (types[i-1] == EN && types[i+1] == EN)
824 types[i] = EN;
825 else if (types[i-1] == AN && types[i+1] == AN)
826 types[i] = AN;
827 }
828 }
829
830 /* Rule (W5)
831 * W5. A sequence of European terminators adjacent to European numbers
832 * changes to all European numbers.
833 *
834 * Optimization: lots here... else ifs need rearrangement
835 */
836 for (i=0; i<count; i++) {
837 if (types[i] == ET) {
838 if (i > 0 && types[i-1] == EN) {
839 types[i] = EN;
840 continue;
841 } else if (i < count-1 && types[i+1] == EN) {
842 types[i] = EN;
843 continue;
844 } else if (i < count-1 && types[i+1] == ET) {
845 j=i;
846 while (j <count && types[j] == ET) {
847 j++;
848 }
849 if (types[j] == EN)
850 types[i] = EN;
851 }
852 }
853 }
854
855 /* Rule (W6)
856 * W6. Otherwise, separators and terminators change to Other Neutral:
857 */
858 for (i=0; i<count; i++) {
859 switch (types[i]) {
860 case ES:
861 case ET:
862 case CS:
863 types[i] = ON;
864 break;
865 }
866 }
867
868 /* Rule (W7)
869 * W7. Search backwards from each instance of a European number until
870 * the first strong type (R, L, or sor) is found. If an L is found,
871 * then change the type of the European number to L.
872 */
873 for (i=0; i<count; i++) {
874 if (types[i] == EN) {
875 j=i;
876 while (j >= 0) {
877 if (types[j] == L) {
878 types[i] = L;
879 break;
880 } else if (types[j] == R || types[j] == AL) {
881 break;
882 }
883 j--;
884 }
885 }
886 }
887
888 /* Rule (N1)
889 * N1. A sequence of neutrals takes the direction of the surrounding
890 * strong text if the text on both sides has the same direction. European
891 * and Arabic numbers are treated as though they were R.
892 */
893 if (count >= 2 && types[0] == ON) {
894 if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
895 types[0] = R;
896 else if (types[1] == L)
897 types[0] = L;
898 }
899 for (i=1; i<(count-1); i++) {
900 if (types[i] == ON) {
901 if (types[i-1] == L) {
902 j=i;
903 while (j<(count-1) && types[j] == ON) {
904 j++;
905 }
906 if (types[j] == L) {
907 while (i<j) {
908 types[i] = L;
909 i++;
910 }
911 }
912
913 } else if ((types[i-1] == R) ||
914 (types[i-1] == EN) ||
915 (types[i-1] == AN)) {
916 j=i;
917 while (j<(count-1) && types[j] == ON) {
918 j++;
919 }
920 if ((types[j] == R) ||
921 (types[j] == EN) ||
922 (types[j] == AN)) {
923 while (i<j) {
924 types[i] = R;
925 i++;
926 }
927 }
928 }
929 }
930 }
931 if (count >= 2 && types[count-1] == ON) {
932 if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
933 types[count-1] = R;
934 else if (types[count-2] == L)
935 types[count-1] = L;
936 }
937
938 /* Rule (N2)
939 * N2. Any remaining neutrals take the embedding direction.
940 */
941 for (i=0; i<count; i++) {
942 if (types[i] == ON) {
943 if ((levels[i] % 2) == 0)
944 types[i] = L;
945 else
946 types[i] = R;
947 }
948 }
949
950 /* Rule (I1)
951 * I1. For all characters with an even (left-to-right) embedding
952 * direction, those of type R go up one level and those of type AN or
953 * EN go up two levels.
954 */
955 for (i=0; i<count; i++) {
956 if ((levels[i] % 2) == 0) {
957 if (types[i] == R)
958 levels[i] += 1;
959 else if (types[i] == AN || types[i] == EN)
960 levels[i] += 2;
961 }
962 }
963
964 /* Rule (I2)
965 * I2. For all characters with an odd (right-to-left) embedding direction,
966 * those of type L, EN or AN go up one level.
967 */
968 for (i=0; i<count; i++) {
969 if ((levels[i] % 2) == 1) {
970 if (types[i] == L || types[i] == EN || types[i] == AN)
971 levels[i] += 1;
972 }
973 }
974
975 /* Rule (L1)
976 * L1. On each line, reset the embedding level of the following characters
977 * to the paragraph embedding level:
978 * (1)segment separators, (2)paragraph separators,
979 * (3)any sequence of whitespace characters preceding
980 * a segment separator or paragraph separator,
981 * (4)and any sequence of white space characters
982 * at the end of the line.
983 * The types of characters used here are the original types, not those
984 * modified by the previous phase.
985 */
986 j=count-1;
987 while (j>0 && (getType(line[j].wc) == WS)) {
988 j--;
989 }
990 if (j < (count-1)) {
991 for (j++; j<count; j++)
992 levels[j] = paragraphLevel;
993 }
994 for (i=0; i<count; i++) {
995 tempType = getType(line[i].wc);
996 if (tempType == WS) {
997 j=i;
998 while (j<count && (getType(line[j].wc) == WS)) {
999 j++;
1000 }
1001 if (j==count || getType(line[j].wc) == B ||
1002 getType(line[j].wc) == S) {
1003 for (j--; j>=i ; j--) {
1004 levels[j] = paragraphLevel;
1005 }
1006 }
1007 } else if (tempType == B || tempType == S) {
1008 levels[i] = paragraphLevel;
1009 }
1010 }
1011
1012 /* Rule (L4) NOT IMPLEMENTED
1013 * L4. A character that possesses the mirrored property as specified by
1014 * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
1015 * resolved directionality of that character is R.
1016 */
1017 /* Note: this is implemented before L2 for efficiency */
1018 for (i=0; i<count; i++)
1019 if ((levels[i] % 2) == 1)
1020 doMirror(&line[i].wc);
1021
1022 /* Rule (L2)
1023 * L2. From the highest level found in the text to the lowest odd level on
1024 * each line, including intermediate levels not actually present in the
1025 * text, reverse any contiguous sequence of characters that are at that
1026 * level or higher
1027 */
1028 /* we flip the character string and leave the level array */
1029 imax = 0;
1030 i=0;
1031 tempType = levels[0];
1032 while (i < count) {
1033 if (levels[i] > tempType) {
1034 tempType = levels[i];
1035 imax=i;
1036 }
1037 i++;
1038 }
1039 /* maximum level in tempType, its index in imax. */
1040 while (tempType > 0) { /* loop from highest level to the least odd, */
1041 /* which i assume is 1 */
1042 flipThisRun(line, levels, tempType, count);
1043 tempType--;
1044 }
1045
1046 /* Rule (L3) NOT IMPLEMENTED
1047 * L3. Combining marks applied to a right-to-left base character will at
1048 * this point precede their base character. If the rendering engine
1049 * expects them to follow the base characters in the final display
1050 * process, then the ordering of the marks and the base character must
1051 * be reversed.
1052 */
1053 sfree(types);
1054 sfree(levels);
1055 return R;
1056 }
1057
1058
1059 /*
1060 * Bad, Horrible function
1061 * takes a pointer to a character that is checked for
1062 * having a mirror glyph.
1063 */
1064 void doMirror(wchar_t* ch)
1065 {
1066 if ((*ch & 0xFF00) == 0) {
1067 switch (*ch) {
1068 case 0x0028: *ch = 0x0029; break;
1069 case 0x0029: *ch = 0x0028; break;
1070 case 0x003C: *ch = 0x003E; break;
1071 case 0x003E: *ch = 0x003C; break;
1072 case 0x005B: *ch = 0x005D; break;
1073 case 0x005D: *ch = 0x005B; break;
1074 case 0x007B: *ch = 0x007D; break;
1075 case 0x007D: *ch = 0x007B; break;
1076 case 0x00AB: *ch = 0x00BB; break;
1077 case 0x00BB: *ch = 0x00AB; break;
1078 }
1079 } else if ((*ch & 0xFF00) == 0x2000) {
1080 switch (*ch) {
1081 case 0x2039: *ch = 0x203A; break;
1082 case 0x203A: *ch = 0x2039; break;
1083 case 0x2045: *ch = 0x2046; break;
1084 case 0x2046: *ch = 0x2045; break;
1085 case 0x207D: *ch = 0x207E; break;
1086 case 0x207E: *ch = 0x207D; break;
1087 case 0x208D: *ch = 0x208E; break;
1088 case 0x208E: *ch = 0x208D; break;
1089 }
1090 } else if ((*ch & 0xFF00) == 0x2200) {
1091 switch (*ch) {
1092 case 0x2208: *ch = 0x220B; break;
1093 case 0x2209: *ch = 0x220C; break;
1094 case 0x220A: *ch = 0x220D; break;
1095 case 0x220B: *ch = 0x2208; break;
1096 case 0x220C: *ch = 0x2209; break;
1097 case 0x220D: *ch = 0x220A; break;
1098 case 0x2215: *ch = 0x29F5; break;
1099 case 0x223C: *ch = 0x223D; break;
1100 case 0x223D: *ch = 0x223C; break;
1101 case 0x2243: *ch = 0x22CD; break;
1102 case 0x2252: *ch = 0x2253; break;
1103 case 0x2253: *ch = 0x2252; break;
1104 case 0x2254: *ch = 0x2255; break;
1105 case 0x2255: *ch = 0x2254; break;
1106 case 0x2264: *ch = 0x2265; break;
1107 case 0x2265: *ch = 0x2264; break;
1108 case 0x2266: *ch = 0x2267; break;
1109 case 0x2267: *ch = 0x2266; break;
1110 case 0x2268: *ch = 0x2269; break;
1111 case 0x2269: *ch = 0x2268; break;
1112 case 0x226A: *ch = 0x226B; break;
1113 case 0x226B: *ch = 0x226A; break;
1114 case 0x226E: *ch = 0x226F; break;
1115 case 0x226F: *ch = 0x226E; break;
1116 case 0x2270: *ch = 0x2271; break;
1117 case 0x2271: *ch = 0x2270; break;
1118 case 0x2272: *ch = 0x2273; break;
1119 case 0x2273: *ch = 0x2272; break;
1120 case 0x2274: *ch = 0x2275; break;
1121 case 0x2275: *ch = 0x2274; break;
1122 case 0x2276: *ch = 0x2277; break;
1123 case 0x2277: *ch = 0x2276; break;
1124 case 0x2278: *ch = 0x2279; break;
1125 case 0x2279: *ch = 0x2278; break;
1126 case 0x227A: *ch = 0x227B; break;
1127 case 0x227B: *ch = 0x227A; break;
1128 case 0x227C: *ch = 0x227D; break;
1129 case 0x227D: *ch = 0x227C; break;
1130 case 0x227E: *ch = 0x227F; break;
1131 case 0x227F: *ch = 0x227E; break;
1132 case 0x2280: *ch = 0x2281; break;
1133 case 0x2281: *ch = 0x2280; break;
1134 case 0x2282: *ch = 0x2283; break;
1135 case 0x2283: *ch = 0x2282; break;
1136 case 0x2284: *ch = 0x2285; break;
1137 case 0x2285: *ch = 0x2284; break;
1138 case 0x2286: *ch = 0x2287; break;
1139 case 0x2287: *ch = 0x2286; break;
1140 case 0x2288: *ch = 0x2289; break;
1141 case 0x2289: *ch = 0x2288; break;
1142 case 0x228A: *ch = 0x228B; break;
1143 case 0x228B: *ch = 0x228A; break;
1144 case 0x228F: *ch = 0x2290; break;
1145 case 0x2290: *ch = 0x228F; break;
1146 case 0x2291: *ch = 0x2292; break;
1147 case 0x2292: *ch = 0x2291; break;
1148 case 0x2298: *ch = 0x29B8; break;
1149 case 0x22A2: *ch = 0x22A3; break;
1150 case 0x22A3: *ch = 0x22A2; break;
1151 case 0x22A6: *ch = 0x2ADE; break;
1152 case 0x22A8: *ch = 0x2AE4; break;
1153 case 0x22A9: *ch = 0x2AE3; break;
1154 case 0x22AB: *ch = 0x2AE5; break;
1155 case 0x22B0: *ch = 0x22B1; break;
1156 case 0x22B1: *ch = 0x22B0; break;
1157 case 0x22B2: *ch = 0x22B3; break;
1158 case 0x22B3: *ch = 0x22B2; break;
1159 case 0x22B4: *ch = 0x22B5; break;
1160 case 0x22B5: *ch = 0x22B4; break;
1161 case 0x22B6: *ch = 0x22B7; break;
1162 case 0x22B7: *ch = 0x22B6; break;
1163 case 0x22C9: *ch = 0x22CA; break;
1164 case 0x22CA: *ch = 0x22C9; break;
1165 case 0x22CB: *ch = 0x22CC; break;
1166 case 0x22CC: *ch = 0x22CB; break;
1167 case 0x22CD: *ch = 0x2243; break;
1168 case 0x22D0: *ch = 0x22D1; break;
1169 case 0x22D1: *ch = 0x22D0; break;
1170 case 0x22D6: *ch = 0x22D7; break;
1171 case 0x22D7: *ch = 0x22D6; break;
1172 case 0x22D8: *ch = 0x22D9; break;
1173 case 0x22D9: *ch = 0x22D8; break;
1174 case 0x22DA: *ch = 0x22DB; break;
1175 case 0x22DB: *ch = 0x22DA; break;
1176 case 0x22DC: *ch = 0x22DD; break;
1177 case 0x22DD: *ch = 0x22DC; break;
1178 case 0x22DE: *ch = 0x22DF; break;
1179 case 0x22DF: *ch = 0x22DE; break;
1180 case 0x22E0: *ch = 0x22E1; break;
1181 case 0x22E1: *ch = 0x22E0; break;
1182 case 0x22E2: *ch = 0x22E3; break;
1183 case 0x22E3: *ch = 0x22E2; break;
1184 case 0x22E4: *ch = 0x22E5; break;
1185 case 0x22E5: *ch = 0x22E4; break;
1186 case 0x22E6: *ch = 0x22E7; break;
1187 case 0x22E7: *ch = 0x22E6; break;
1188 case 0x22E8: *ch = 0x22E9; break;
1189 case 0x22E9: *ch = 0x22E8; break;
1190 case 0x22EA: *ch = 0x22EB; break;
1191 case 0x22EB: *ch = 0x22EA; break;
1192 case 0x22EC: *ch = 0x22ED; break;
1193 case 0x22ED: *ch = 0x22EC; break;
1194 case 0x22F0: *ch = 0x22F1; break;
1195 case 0x22F1: *ch = 0x22F0; break;
1196 case 0x22F2: *ch = 0x22FA; break;
1197 case 0x22F3: *ch = 0x22FB; break;
1198 case 0x22F4: *ch = 0x22FC; break;
1199 case 0x22F6: *ch = 0x22FD; break;
1200 case 0x22F7: *ch = 0x22FE; break;
1201 case 0x22FA: *ch = 0x22F2; break;
1202 case 0x22FB: *ch = 0x22F3; break;
1203 case 0x22FC: *ch = 0x22F4; break;
1204 case 0x22FD: *ch = 0x22F6; break;
1205 case 0x22FE: *ch = 0x22F7; break;
1206 }
1207 } else if ((*ch & 0xFF00) == 0x2300) {
1208 switch (*ch) {
1209 case 0x2308: *ch = 0x2309; break;
1210 case 0x2309: *ch = 0x2308; break;
1211 case 0x230A: *ch = 0x230B; break;
1212 case 0x230B: *ch = 0x230A; break;
1213 case 0x2329: *ch = 0x232A; break;
1214 case 0x232A: *ch = 0x2329; break;
1215 }
1216 } else if ((*ch & 0xFF00) == 0x2700) {
1217 switch (*ch) {
1218 case 0x2768: *ch = 0x2769; break;
1219 case 0x2769: *ch = 0x2768; break;
1220 case 0x276A: *ch = 0x276B; break;
1221 case 0x276B: *ch = 0x276A; break;
1222 case 0x276C: *ch = 0x276D; break;
1223 case 0x276D: *ch = 0x276C; break;
1224 case 0x276E: *ch = 0x276F; break;
1225 case 0x276F: *ch = 0x276E; break;
1226 case 0x2770: *ch = 0x2771; break;
1227 case 0x2771: *ch = 0x2770; break;
1228 case 0x2772: *ch = 0x2773; break;
1229 case 0x2773: *ch = 0x2772; break;
1230 case 0x2774: *ch = 0x2775; break;
1231 case 0x2775: *ch = 0x2774; break;
1232 case 0x27D5: *ch = 0x27D6; break;
1233 case 0x27D6: *ch = 0x27D5; break;
1234 case 0x27DD: *ch = 0x27DE; break;
1235 case 0x27DE: *ch = 0x27DD; break;
1236 case 0x27E2: *ch = 0x27E3; break;
1237 case 0x27E3: *ch = 0x27E2; break;
1238 case 0x27E4: *ch = 0x27E5; break;
1239 case 0x27E5: *ch = 0x27E4; break;
1240 case 0x27E6: *ch = 0x27E7; break;
1241 case 0x27E7: *ch = 0x27E6; break;
1242 case 0x27E8: *ch = 0x27E9; break;
1243 case 0x27E9: *ch = 0x27E8; break;
1244 case 0x27EA: *ch = 0x27EB; break;
1245 case 0x27EB: *ch = 0x27EA; break;
1246 }
1247 } else if ((*ch & 0xFF00) == 0x2900) {
1248 switch (*ch) {
1249 case 0x2983: *ch = 0x2984; break;
1250 case 0x2984: *ch = 0x2983; break;
1251 case 0x2985: *ch = 0x2986; break;
1252 case 0x2986: *ch = 0x2985; break;
1253 case 0x2987: *ch = 0x2988; break;
1254 case 0x2988: *ch = 0x2987; break;
1255 case 0x2989: *ch = 0x298A; break;
1256 case 0x298A: *ch = 0x2989; break;
1257 case 0x298B: *ch = 0x298C; break;
1258 case 0x298C: *ch = 0x298B; break;
1259 case 0x298D: *ch = 0x2990; break;
1260 case 0x298E: *ch = 0x298F; break;
1261 case 0x298F: *ch = 0x298E; break;
1262 case 0x2990: *ch = 0x298D; break;
1263 case 0x2991: *ch = 0x2992; break;
1264 case 0x2992: *ch = 0x2991; break;
1265 case 0x2993: *ch = 0x2994; break;
1266 case 0x2994: *ch = 0x2993; break;
1267 case 0x2995: *ch = 0x2996; break;
1268 case 0x2996: *ch = 0x2995; break;
1269 case 0x2997: *ch = 0x2998; break;
1270 case 0x2998: *ch = 0x2997; break;
1271 case 0x29B8: *ch = 0x2298; break;
1272 case 0x29C0: *ch = 0x29C1; break;
1273 case 0x29C1: *ch = 0x29C0; break;
1274 case 0x29C4: *ch = 0x29C5; break;
1275 case 0x29C5: *ch = 0x29C4; break;
1276 case 0x29CF: *ch = 0x29D0; break;
1277 case 0x29D0: *ch = 0x29CF; break;
1278 case 0x29D1: *ch = 0x29D2; break;
1279 case 0x29D2: *ch = 0x29D1; break;
1280 case 0x29D4: *ch = 0x29D5; break;
1281 case 0x29D5: *ch = 0x29D4; break;
1282 case 0x29D8: *ch = 0x29D9; break;
1283 case 0x29D9: *ch = 0x29D8; break;
1284 case 0x29DA: *ch = 0x29DB; break;
1285 case 0x29DB: *ch = 0x29DA; break;
1286 case 0x29F5: *ch = 0x2215; break;
1287 case 0x29F8: *ch = 0x29F9; break;
1288 case 0x29F9: *ch = 0x29F8; break;
1289 case 0x29FC: *ch = 0x29FD; break;
1290 case 0x29FD: *ch = 0x29FC; break;
1291 }
1292 } else if ((*ch & 0xFF00) == 0x2A00) {
1293 switch (*ch) {
1294 case 0x2A2B: *ch = 0x2A2C; break;
1295 case 0x2A2C: *ch = 0x2A2B; break;
1296 case 0x2A2D: *ch = 0x2A2C; break;
1297 case 0x2A2E: *ch = 0x2A2D; break;
1298 case 0x2A34: *ch = 0x2A35; break;
1299 case 0x2A35: *ch = 0x2A34; break;
1300 case 0x2A3C: *ch = 0x2A3D; break;
1301 case 0x2A3D: *ch = 0x2A3C; break;
1302 case 0x2A64: *ch = 0x2A65; break;
1303 case 0x2A65: *ch = 0x2A64; break;
1304 case 0x2A79: *ch = 0x2A7A; break;
1305 case 0x2A7A: *ch = 0x2A79; break;
1306 case 0x2A7D: *ch = 0x2A7E; break;
1307 case 0x2A7E: *ch = 0x2A7D; break;
1308 case 0x2A7F: *ch = 0x2A80; break;
1309 case 0x2A80: *ch = 0x2A7F; break;
1310 case 0x2A81: *ch = 0x2A82; break;
1311 case 0x2A82: *ch = 0x2A81; break;
1312 case 0x2A83: *ch = 0x2A84; break;
1313 case 0x2A84: *ch = 0x2A83; break;
1314 case 0x2A8B: *ch = 0x2A8C; break;
1315 case 0x2A8C: *ch = 0x2A8B; break;
1316 case 0x2A91: *ch = 0x2A92; break;
1317 case 0x2A92: *ch = 0x2A91; break;
1318 case 0x2A93: *ch = 0x2A94; break;
1319 case 0x2A94: *ch = 0x2A93; break;
1320 case 0x2A95: *ch = 0x2A96; break;
1321 case 0x2A96: *ch = 0x2A95; break;
1322 case 0x2A97: *ch = 0x2A98; break;
1323 case 0x2A98: *ch = 0x2A97; break;
1324 case 0x2A99: *ch = 0x2A9A; break;
1325 case 0x2A9A: *ch = 0x2A99; break;
1326 case 0x2A9B: *ch = 0x2A9C; break;
1327 case 0x2A9C: *ch = 0x2A9B; break;
1328 case 0x2AA1: *ch = 0x2AA2; break;
1329 case 0x2AA2: *ch = 0x2AA1; break;
1330 case 0x2AA6: *ch = 0x2AA7; break;
1331 case 0x2AA7: *ch = 0x2AA6; break;
1332 case 0x2AA8: *ch = 0x2AA9; break;
1333 case 0x2AA9: *ch = 0x2AA8; break;
1334 case 0x2AAA: *ch = 0x2AAB; break;
1335 case 0x2AAB: *ch = 0x2AAA; break;
1336 case 0x2AAC: *ch = 0x2AAD; break;
1337 case 0x2AAD: *ch = 0x2AAC; break;
1338 case 0x2AAF: *ch = 0x2AB0; break;
1339 case 0x2AB0: *ch = 0x2AAF; break;
1340 case 0x2AB3: *ch = 0x2AB4; break;
1341 case 0x2AB4: *ch = 0x2AB3; break;
1342 case 0x2ABB: *ch = 0x2ABC; break;
1343 case 0x2ABC: *ch = 0x2ABB; break;
1344 case 0x2ABD: *ch = 0x2ABE; break;
1345 case 0x2ABE: *ch = 0x2ABD; break;
1346 case 0x2ABF: *ch = 0x2AC0; break;
1347 case 0x2AC0: *ch = 0x2ABF; break;
1348 case 0x2AC1: *ch = 0x2AC2; break;
1349 case 0x2AC2: *ch = 0x2AC1; break;
1350 case 0x2AC3: *ch = 0x2AC4; break;
1351 case 0x2AC4: *ch = 0x2AC3; break;
1352 case 0x2AC5: *ch = 0x2AC6; break;
1353 case 0x2AC6: *ch = 0x2AC5; break;
1354 case 0x2ACD: *ch = 0x2ACE; break;
1355 case 0x2ACE: *ch = 0x2ACD; break;
1356 case 0x2ACF: *ch = 0x2AD0; break;
1357 case 0x2AD0: *ch = 0x2ACF; break;
1358 case 0x2AD1: *ch = 0x2AD2; break;
1359 case 0x2AD2: *ch = 0x2AD1; break;
1360 case 0x2AD3: *ch = 0x2AD4; break;
1361 case 0x2AD4: *ch = 0x2AD3; break;
1362 case 0x2AD5: *ch = 0x2AD6; break;
1363 case 0x2AD6: *ch = 0x2AD5; break;
1364 case 0x2ADE: *ch = 0x22A6; break;
1365 case 0x2AE3: *ch = 0x22A9; break;
1366 case 0x2AE4: *ch = 0x22A8; break;
1367 case 0x2AE5: *ch = 0x22AB; break;
1368 case 0x2AEC: *ch = 0x2AED; break;
1369 case 0x2AED: *ch = 0x2AEC; break;
1370 case 0x2AF7: *ch = 0x2AF8; break;
1371 case 0x2AF8: *ch = 0x2AF7; break;
1372 case 0x2AF9: *ch = 0x2AFA; break;
1373 case 0x2AFA: *ch = 0x2AF9; break;
1374 }
1375 } else if ((*ch & 0xFF00) == 0x3000) {
1376 switch (*ch) {
1377 case 0x3008: *ch = 0x3009; break;
1378 case 0x3009: *ch = 0x3008; break;
1379 case 0x300A: *ch = 0x300B; break;
1380 case 0x300B: *ch = 0x300A; break;
1381 case 0x300C: *ch = 0x300D; break;
1382 case 0x300D: *ch = 0x300C; break;
1383 case 0x300E: *ch = 0x300F; break;
1384 case 0x300F: *ch = 0x300E; break;
1385 case 0x3010: *ch = 0x3011; break;
1386 case 0x3011: *ch = 0x3010; break;
1387 case 0x3014: *ch = 0x3015; break;
1388 case 0x3015: *ch = 0x3014; break;
1389 case 0x3016: *ch = 0x3017; break;
1390 case 0x3017: *ch = 0x3016; break;
1391 case 0x3018: *ch = 0x3019; break;
1392 case 0x3019: *ch = 0x3018; break;
1393 case 0x301A: *ch = 0x301B; break;
1394 case 0x301B: *ch = 0x301A; break;
1395 }
1396 } else if ((*ch & 0xFF00) == 0xFF00) {
1397 switch (*ch) {
1398 case 0xFF08: *ch = 0xFF09; break;
1399 case 0xFF09: *ch = 0xFF08; break;
1400 case 0xFF1C: *ch = 0xFF1E; break;
1401 case 0xFF1E: *ch = 0xFF1C; break;
1402 case 0xFF3B: *ch = 0xFF3D; break;
1403 case 0xFF3D: *ch = 0xFF3B; break;
1404 case 0xFF5B: *ch = 0xFF5D; break;
1405 case 0xFF5D: *ch = 0xFF5B; break;
1406 case 0xFF5F: *ch = 0xFF60; break;
1407 case 0xFF60: *ch = 0xFF5F; break;
1408 case 0xFF62: *ch = 0xFF63; break;
1409 case 0xFF63: *ch = 0xFF62; break;
1410 }
1411 }
1412 }