X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/putty/blobdiff_plain/7bc1ffdf483a88503d15cbc657de6fbc945bc3f0..b4bc538452c92b6a2f9c935028461f5c774a4f1f:/minibidi.c diff --git a/minibidi.c b/minibidi.c index c13276d0..85a0c9c4 100644 --- a/minibidi.c +++ b/minibidi.c @@ -38,6 +38,14 @@ #define OISL 0x80 /* Override is L */ #define OISR 0x40 /* Override is R */ +/* For standalone compilation in a testing mode. + * Still depends on the PuTTY headers for snewn and sfree, but can avoid + * _linking_ with any other PuTTY code. */ +#ifdef TEST_GETTYPE +#define safemalloc malloc +#define safefree free +#endif + /* Shaping Helpers */ #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \ shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/ @@ -50,7 +58,7 @@ shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/ #define leastGreaterEven(x) ( ((x)+2) &~ 1 ) typedef struct bidi_char { - wchar_t origwc, wc; + unsigned int origwc, wc; unsigned short index; } bidi_char; @@ -62,7 +70,7 @@ unsigned char setOverrideBits(unsigned char level, unsigned char override); int getPreviousLevel(unsigned char* level, int from); int do_shape(bidi_char *line, bidi_char *to, int count); int do_bidi(bidi_char *line, int count); -void doMirror(wchar_t* ch); +void doMirror(unsigned int *ch); /* character types */ enum { @@ -84,7 +92,7 @@ enum { B, S, WS, - ON, + ON }; /* Shaping Types */ @@ -103,7 +111,7 @@ typedef struct { /* Kept near the actual table, for verification. */ #define SHAPE_FIRST 0x621 -#define SHAPE_LAST 0x64A +#define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1) const shape_node shapetypes[] = { /* index, Typ, Iso, Ligature Index*/ @@ -149,6 +157,142 @@ const shape_node shapetypes[] = { /* 648 */ {SR, 0xFEED}, /* 649 */ {SR, 0xFEEF}, /* SD */ /* 64A */ {SD, 0xFEF1}, + /* 64B */ {SU, 0x0}, + /* 64C */ {SU, 0x0}, + /* 64D */ {SU, 0x0}, + /* 64E */ {SU, 0x0}, + /* 64F */ {SU, 0x0}, + /* 650 */ {SU, 0x0}, + /* 651 */ {SU, 0x0}, + /* 652 */ {SU, 0x0}, + /* 653 */ {SU, 0x0}, + /* 654 */ {SU, 0x0}, + /* 655 */ {SU, 0x0}, + /* 656 */ {SU, 0x0}, + /* 657 */ {SU, 0x0}, + /* 658 */ {SU, 0x0}, + /* 659 */ {SU, 0x0}, + /* 65A */ {SU, 0x0}, + /* 65B */ {SU, 0x0}, + /* 65C */ {SU, 0x0}, + /* 65D */ {SU, 0x0}, + /* 65E */ {SU, 0x0}, + /* 65F */ {SU, 0x0}, + /* 660 */ {SU, 0x0}, + /* 661 */ {SU, 0x0}, + /* 662 */ {SU, 0x0}, + /* 663 */ {SU, 0x0}, + /* 664 */ {SU, 0x0}, + /* 665 */ {SU, 0x0}, + /* 666 */ {SU, 0x0}, + /* 667 */ {SU, 0x0}, + /* 668 */ {SU, 0x0}, + /* 669 */ {SU, 0x0}, + /* 66A */ {SU, 0x0}, + /* 66B */ {SU, 0x0}, + /* 66C */ {SU, 0x0}, + /* 66D */ {SU, 0x0}, + /* 66E */ {SU, 0x0}, + /* 66F */ {SU, 0x0}, + /* 670 */ {SU, 0x0}, + /* 671 */ {SR, 0xFB50}, + /* 672 */ {SU, 0x0}, + /* 673 */ {SU, 0x0}, + /* 674 */ {SU, 0x0}, + /* 675 */ {SU, 0x0}, + /* 676 */ {SU, 0x0}, + /* 677 */ {SU, 0x0}, + /* 678 */ {SU, 0x0}, + /* 679 */ {SD, 0xFB66}, + /* 67A */ {SD, 0xFB5E}, + /* 67B */ {SD, 0xFB52}, + /* 67C */ {SU, 0x0}, + /* 67D */ {SU, 0x0}, + /* 67E */ {SD, 0xFB56}, + /* 67F */ {SD, 0xFB62}, + /* 680 */ {SD, 0xFB5A}, + /* 681 */ {SU, 0x0}, + /* 682 */ {SU, 0x0}, + /* 683 */ {SD, 0xFB76}, + /* 684 */ {SD, 0xFB72}, + /* 685 */ {SU, 0x0}, + /* 686 */ {SD, 0xFB7A}, + /* 687 */ {SD, 0xFB7E}, + /* 688 */ {SR, 0xFB88}, + /* 689 */ {SU, 0x0}, + /* 68A */ {SU, 0x0}, + /* 68B */ {SU, 0x0}, + /* 68C */ {SR, 0xFB84}, + /* 68D */ {SR, 0xFB82}, + /* 68E */ {SR, 0xFB86}, + /* 68F */ {SU, 0x0}, + /* 690 */ {SU, 0x0}, + /* 691 */ {SR, 0xFB8C}, + /* 692 */ {SU, 0x0}, + /* 693 */ {SU, 0x0}, + /* 694 */ {SU, 0x0}, + /* 695 */ {SU, 0x0}, + /* 696 */ {SU, 0x0}, + /* 697 */ {SU, 0x0}, + /* 698 */ {SR, 0xFB8A}, + /* 699 */ {SU, 0x0}, + /* 69A */ {SU, 0x0}, + /* 69B */ {SU, 0x0}, + /* 69C */ {SU, 0x0}, + /* 69D */ {SU, 0x0}, + /* 69E */ {SU, 0x0}, + /* 69F */ {SU, 0x0}, + /* 6A0 */ {SU, 0x0}, + /* 6A1 */ {SU, 0x0}, + /* 6A2 */ {SU, 0x0}, + /* 6A3 */ {SU, 0x0}, + /* 6A4 */ {SD, 0xFB6A}, + /* 6A5 */ {SU, 0x0}, + /* 6A6 */ {SD, 0xFB6E}, + /* 6A7 */ {SU, 0x0}, + /* 6A8 */ {SU, 0x0}, + /* 6A9 */ {SD, 0xFB8E}, + /* 6AA */ {SU, 0x0}, + /* 6AB */ {SU, 0x0}, + /* 6AC */ {SU, 0x0}, + /* 6AD */ {SD, 0xFBD3}, + /* 6AE */ {SU, 0x0}, + /* 6AF */ {SD, 0xFB92}, + /* 6B0 */ {SU, 0x0}, + /* 6B1 */ {SD, 0xFB9A}, + /* 6B2 */ {SU, 0x0}, + /* 6B3 */ {SD, 0xFB96}, + /* 6B4 */ {SU, 0x0}, + /* 6B5 */ {SU, 0x0}, + /* 6B6 */ {SU, 0x0}, + /* 6B7 */ {SU, 0x0}, + /* 6B8 */ {SU, 0x0}, + /* 6B9 */ {SU, 0x0}, + /* 6BA */ {SR, 0xFB9E}, + /* 6BB */ {SD, 0xFBA0}, + /* 6BC */ {SU, 0x0}, + /* 6BD */ {SU, 0x0}, + /* 6BE */ {SD, 0xFBAA}, + /* 6BF */ {SU, 0x0}, + /* 6C0 */ {SR, 0xFBA4}, + /* 6C1 */ {SD, 0xFBA6}, + /* 6C2 */ {SU, 0x0}, + /* 6C3 */ {SU, 0x0}, + /* 6C4 */ {SU, 0x0}, + /* 6C5 */ {SR, 0xFBE0}, + /* 6C6 */ {SR, 0xFBD9}, + /* 6C7 */ {SR, 0xFBD7}, + /* 6C8 */ {SR, 0xFBDB}, + /* 6C9 */ {SR, 0xFBE2}, + /* 6CA */ {SU, 0x0}, + /* 6CB */ {SR, 0xFBDE}, + /* 6CC */ {SD, 0xFBFC}, + /* 6CD */ {SU, 0x0}, + /* 6CE */ {SU, 0x0}, + /* 6CF */ {SU, 0x0}, + /* 6D0 */ {SU, 0x0}, + /* 6D1 */ {SU, 0x0}, + /* 6D2 */ {SR, 0xFBAE}, }; /* @@ -840,7 +984,7 @@ unsigned char getType(int ch) {0xe0020, 0xe007f, BN}, {0xe0100, 0xe01ef, NSM}, {0xf0000, 0xffffd, L}, - {0x100000, 0x10fffd, L}, + {0x100000, 0x10fffd, L} }; int i, j, k; @@ -848,7 +992,7 @@ unsigned char getType(int ch) i = -1; j = lenof(lookup); - while (j - i > 2) { + while (j - i > 1) { k = (i + j) / 2; if (ch < lookup[k].first) j = k; @@ -870,6 +1014,40 @@ unsigned char getType(int ch) } /* + * Function exported to front ends to allow them to identify + * bidi-active characters (in case, for example, the platform's + * text display function can't conveniently be prevented from doing + * its own bidi and so special treatment is required for characters + * that would cause the bidi algorithm to activate). + * + * This function is passed a single Unicode code point, and returns + * nonzero if the presence of this code point can possibly cause + * the bidi algorithm to do any reordering. Thus, any string + * composed entirely of characters for which is_rtl() returns zero + * should be safe to pass to a bidi-active platform display + * function without fear. + * + * (is_rtl() must therefore also return true for any character + * which would be affected by Arabic shaping, but this isn't + * important because all such characters are right-to-left so it + * would have flagged them anyway.) + */ +int is_rtl(int c) +{ + /* + * After careful reading of the Unicode bidi algorithm (URL as + * given at the top of this file) I believe that the only + * character classes which can possibly cause trouble are R, + * AL, RLE and RLO. I think that any string containing no + * character in any of those classes will be displayed + * uniformly left-to-right by the Unicode bidi algorithm. + */ + const int mask = (1< tempType) { + if (levels[i] > tempType) tempType = levels[i]; - imax=i; - } i++; } - /* maximum level in tempType, its index in imax. */ + /* maximum level in tempType. */ while (tempType > 0) { /* loop from highest level to the least odd, */ /* which i assume is 1 */ flipThisRun(line, levels, tempType, count); @@ -1461,7 +1636,7 @@ int do_bidi(bidi_char *line, int count) * takes a pointer to a character that is checked for * having a mirror glyph. */ -void doMirror(wchar_t* ch) +void doMirror(unsigned int *ch) { if ((*ch & 0xFF00) == 0) { switch (*ch) { @@ -1810,3 +1985,47 @@ void doMirror(wchar_t* ch) } } } + +#ifdef TEST_GETTYPE + +#include +#include + +int main(int argc, char **argv) +{ + static const struct { int type; char *name; } typetoname[] = { +#define TYPETONAME(X) { X , #X } + TYPETONAME(L), + TYPETONAME(LRE), + TYPETONAME(LRO), + TYPETONAME(R), + TYPETONAME(AL), + TYPETONAME(RLE), + TYPETONAME(RLO), + TYPETONAME(PDF), + TYPETONAME(EN), + TYPETONAME(ES), + TYPETONAME(ET), + TYPETONAME(AN), + TYPETONAME(CS), + TYPETONAME(NSM), + TYPETONAME(BN), + TYPETONAME(B), + TYPETONAME(S), + TYPETONAME(WS), + TYPETONAME(ON), +#undef TYPETONAME + }; + int i; + + for (i = 1; i < argc; i++) { + unsigned long chr = strtoul(argv[i], NULL, 0); + int type = getType(chr); + assert(typetoname[type].type == type); + printf("U+%04x: %s\n", chr, typetoname[type].name); + } + + return 0; +} + +#endif