The COMPOUND_TEXT encoding used by some X applications to transfer
[sgt/charset] / sbcsgen.pl
1 #!/usr/bin/env perl -w
2
3 # This script generates sbcsdat.c (the data for all the SBCSes) from its
4 # source form sbcs.dat.
5
6 $infile = "sbcs.dat";
7 $infile = shift @ARGV if defined $ARGV[0];
8 $outfile = "sbcsdat.c";
9 $outfile = shift @ARGV if defined $ARGV[0];
10 $outheader = "sbcsdat.h";
11 $outheader = shift @ARGV if defined $ARGV[0];
12
13 open FOO, $infile;
14 open BAR, ">$outfile";
15 select BAR;
16
17 print "/*\n";
18 print " * sbcsdat.c - data definitions for single-byte character sets.\n";
19 print " *\n";
20 print " * Generated by sbcsgen.pl from sbcs.dat.\n";
21 print " * You should edit those files rather than editing this one.\n";
22 print " */\n";
23 print "\n";
24 print "#ifndef ENUM_CHARSETS\n";
25 print "\n";
26 print "#include \"charset.h\"\n";
27 print "#include \"internal.h\"\n";
28 print "\n";
29
30 my $charsetname = undef;
31 my @vals = ();
32
33 my @charsetnames = ();
34 my @sortpriority = ();
35
36 while (<FOO>) {
37 chomp;
38 if (/^charset (.*)$/) {
39 $charsetname = $1;
40 @vals = ();
41 @sortpriority = map { 0 } 0..255;
42 } elsif (/^sortpriority ([^-]*)-([^-]*) (.*)$/) {
43 for ($i = hex $1; $i <= hex $2; $i++) {
44 $sortpriority[$i] += $3;
45 }
46 } elsif (/^[0-9a-fA-FX]/) {
47 push @vals, map { $_ eq "XXXX" ? -1 : hex $_ } split / +/, $_;
48 if (scalar @vals > 256) {
49 die "$infile:$.: charset $charsetname has more than 256 values\n";
50 } elsif (scalar @vals == 256) {
51 &outcharset($charsetname, \@vals, \@sortpriority);
52 push @charsetnames, $charsetname;
53 $charsetname = undef;
54 @vals = ();
55 @sortpriority = map { 0 } 0..255;
56 }
57 }
58 }
59
60 print "#else /* ENUM_CHARSETS */\n";
61 print "\n";
62
63 foreach $i (@charsetnames) {
64 print "ENUM_CHARSET($i)\n";
65 }
66
67 print "\n";
68 print "#endif /* ENUM_CHARSETS */\n";
69
70 close BAR;
71
72 open BAR, ">$outheader";
73 select BAR;
74
75 print "/*\n";
76 print " * sbcsdat.h - header file for SBCS data structures.\n";
77 print " *\n";
78 print " * Generated by sbcsgen.pl from sbcs.dat.\n";
79 print " * You should edit those files rather than editing this one.\n";
80 print " */\n";
81 print "\n";
82 print "#ifndef charset_sbcsdat_h\n";
83 print "#define charset_sbcsdat_h\n";
84 print "\n";
85 print "#include \"charset.h\"\n";
86 print "#include \"internal.h\"\n";
87 print "\n";
88 foreach $i (@charsetnames) {
89 print "extern const sbcs_data sbcsdata_$i;\n";
90 }
91 print "\n";
92 print "#endif /* charset_sbcsdat_h */\n";
93
94 close BAR;
95
96 sub outcharset($$$) {
97 my ($name, $vals, $sortpriority) = @_;
98 my ($prefix, $i, @sorted);
99
100 print "const sbcs_data sbcsdata_$name = {\n";
101 print " {\n";
102 $prefix = " ";
103 @sorted = ();
104 for ($i = 0; $i < 256; $i++) {
105 if ($vals->[$i] < 0) {
106 printf "%sERROR ", $prefix;
107 } else {
108 printf "%s0x%04x", $prefix, $vals->[$i];
109 die "ooh? $i\n" unless defined $sortpriority->[$i];
110 push @sorted, [$i, $vals->[$i], 0+$sortpriority->[$i]];
111 }
112 if ($i % 8 == 7) {
113 $prefix = ",\n ";
114 } else {
115 $prefix = ", ";
116 }
117 }
118 print "\n },\n {\n";
119 @sorted = sort { $a->[1] == $b->[1] ?
120 $b->[2] <=> $a->[2] :
121 $a->[1] <=> $b->[1] } @sorted;
122 $prefix = " ";
123 $uval = -1;
124 for ($i = $j = 0; $i < scalar @sorted; $i++) {
125 next if ($uval == $sorted[$i]->[1]); # low-priority alternative
126 $uval = $sorted[$i]->[1];
127 printf "%s0x%02x", $prefix, $sorted[$i]->[0];
128 if ($j % 8 == 7) {
129 $prefix = ",\n ";
130 } else {
131 $prefix = ", ";
132 }
133 $j++;
134 }
135 printf "\n },\n %d\n", $j;
136 print "};\n";
137 print "const charset_spec charset_$name = {\n" .
138 " $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n";
139 }