Silly of me to overlook it: another obvious way you might like to
[sgt/charset] / sbcsgen.pl
CommitLineData
c6d25d8d 1#!/usr/bin/env perl -w
2
3# This script generates sbcsdat.c (the data for all the SBCSes) from its
4# source form sbcs.dat.
5
6$infile = "sbcs.dat";
7$infile = shift @ARGV if defined $ARGV[0];
8$outfile = "sbcsdat.c";
9$outfile = shift @ARGV if defined $ARGV[0];
01081d4e 10$outheader = "sbcsdat.h";
11$outheader = shift @ARGV if defined $ARGV[0];
c6d25d8d 12
13open FOO, $infile;
14open BAR, ">$outfile";
15select BAR;
16
17print "/*\n";
18print " * sbcsdat.c - data definitions for single-byte character sets.\n";
19print " *\n";
20print " * Generated by sbcsgen.pl from sbcs.dat.\n";
21print " * You should edit those files rather than editing this one.\n";
22print " */\n";
23print "\n";
24print "#ifndef ENUM_CHARSETS\n";
25print "\n";
26print "#include \"charset.h\"\n";
27print "#include \"internal.h\"\n";
28print "\n";
29
30my $charsetname = undef;
31my @vals = ();
32
33my @charsetnames = ();
34my @sortpriority = ();
35
36while (<FOO>) {
37 chomp;
86e28c9b 38 y/\r\n//; # robustness in the face of strange line endings
c6d25d8d 39 if (/^charset (.*)$/) {
40 $charsetname = $1;
41 @vals = ();
42 @sortpriority = map { 0 } 0..255;
43 } elsif (/^sortpriority ([^-]*)-([^-]*) (.*)$/) {
44 for ($i = hex $1; $i <= hex $2; $i++) {
45 $sortpriority[$i] += $3;
46 }
47 } elsif (/^[0-9a-fA-FX]/) {
48 push @vals, map { $_ eq "XXXX" ? -1 : hex $_ } split / +/, $_;
49 if (scalar @vals > 256) {
50 die "$infile:$.: charset $charsetname has more than 256 values\n";
51 } elsif (scalar @vals == 256) {
52 &outcharset($charsetname, \@vals, \@sortpriority);
53 push @charsetnames, $charsetname;
54 $charsetname = undef;
55 @vals = ();
56 @sortpriority = map { 0 } 0..255;
57 }
58 }
59}
60
61print "#else /* ENUM_CHARSETS */\n";
62print "\n";
63
64foreach $i (@charsetnames) {
65 print "ENUM_CHARSET($i)\n";
66}
67
68print "\n";
69print "#endif /* ENUM_CHARSETS */\n";
70
01081d4e 71close BAR;
72
73open BAR, ">$outheader";
74select BAR;
75
76print "/*\n";
77print " * sbcsdat.h - header file for SBCS data structures.\n";
78print " *\n";
79print " * Generated by sbcsgen.pl from sbcs.dat.\n";
80print " * You should edit those files rather than editing this one.\n";
81print " */\n";
82print "\n";
83print "#ifndef charset_sbcsdat_h\n";
84print "#define charset_sbcsdat_h\n";
85print "\n";
86print "#include \"charset.h\"\n";
87print "#include \"internal.h\"\n";
88print "\n";
89foreach $i (@charsetnames) {
90 print "extern const sbcs_data sbcsdata_$i;\n";
91}
92print "\n";
93print "#endif /* charset_sbcsdat_h */\n";
94
95close BAR;
96
c6d25d8d 97sub outcharset($$$) {
98 my ($name, $vals, $sortpriority) = @_;
99 my ($prefix, $i, @sorted);
100
01081d4e 101 print "const sbcs_data sbcsdata_$name = {\n";
c6d25d8d 102 print " {\n";
103 $prefix = " ";
104 @sorted = ();
105 for ($i = 0; $i < 256; $i++) {
106 if ($vals->[$i] < 0) {
107 printf "%sERROR ", $prefix;
108 } else {
109 printf "%s0x%04x", $prefix, $vals->[$i];
110 die "ooh? $i\n" unless defined $sortpriority->[$i];
111 push @sorted, [$i, $vals->[$i], 0+$sortpriority->[$i]];
112 }
113 if ($i % 8 == 7) {
114 $prefix = ",\n ";
115 } else {
116 $prefix = ", ";
117 }
118 }
119 print "\n },\n {\n";
4fd00a6f 120 @sorted = sort { ($a->[1] == $b->[1] ?
121 $b->[2] <=> $a->[2] :
122 $a->[1] <=> $b->[1]) ||
123 $a->[0] <=> $b->[0] } @sorted;
c6d25d8d 124 $prefix = " ";
125 $uval = -1;
126 for ($i = $j = 0; $i < scalar @sorted; $i++) {
127 next if ($uval == $sorted[$i]->[1]); # low-priority alternative
128 $uval = $sorted[$i]->[1];
129 printf "%s0x%02x", $prefix, $sorted[$i]->[0];
130 if ($j % 8 == 7) {
131 $prefix = ",\n ";
132 } else {
133 $prefix = ", ";
134 }
135 $j++;
136 }
137 printf "\n },\n %d\n", $j;
138 print "};\n";
139 print "const charset_spec charset_$name = {\n" .
01081d4e 140 " $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n";
c6d25d8d 141}