Just in case sbcsgen.pl is fed an sbcs.dat with the wrong line
[sgt/charset] / sbcsgen.pl
1 #!/usr/bin/env perl -w
2
3 # This script generates sbcsdat.c (the data for all the SBCSes) from its
4 # source form sbcs.dat.
5
6 $infile = "sbcs.dat";
7 $infile = shift @ARGV if defined $ARGV[0];
8 $outfile = "sbcsdat.c";
9 $outfile = shift @ARGV if defined $ARGV[0];
10 $outheader = "sbcsdat.h";
11 $outheader = shift @ARGV if defined $ARGV[0];
12
13 open FOO, $infile;
14 open BAR, ">$outfile";
15 select BAR;
16
17 print "/*\n";
18 print " * sbcsdat.c - data definitions for single-byte character sets.\n";
19 print " *\n";
20 print " * Generated by sbcsgen.pl from sbcs.dat.\n";
21 print " * You should edit those files rather than editing this one.\n";
22 print " */\n";
23 print "\n";
24 print "#ifndef ENUM_CHARSETS\n";
25 print "\n";
26 print "#include \"charset.h\"\n";
27 print "#include \"internal.h\"\n";
28 print "\n";
29
30 my $charsetname = undef;
31 my @vals = ();
32
33 my @charsetnames = ();
34 my @sortpriority = ();
35
36 while (<FOO>) {
37 chomp;
38 y/\r\n//; # robustness in the face of strange line endings
39 if (/^charset (.*)$/) {
40 $charsetname = $1;
41 @vals = ();
42 @sortpriority = map { 0 } 0..255;
43 } elsif (/^sortpriority ([^-]*)-([^-]*) (.*)$/) {
44 for ($i = hex $1; $i <= hex $2; $i++) {
45 $sortpriority[$i] += $3;
46 }
47 } elsif (/^[0-9a-fA-FX]/) {
48 push @vals, map { $_ eq "XXXX" ? -1 : hex $_ } split / +/, $_;
49 if (scalar @vals > 256) {
50 die "$infile:$.: charset $charsetname has more than 256 values\n";
51 } elsif (scalar @vals == 256) {
52 &outcharset($charsetname, \@vals, \@sortpriority);
53 push @charsetnames, $charsetname;
54 $charsetname = undef;
55 @vals = ();
56 @sortpriority = map { 0 } 0..255;
57 }
58 }
59 }
60
61 print "#else /* ENUM_CHARSETS */\n";
62 print "\n";
63
64 foreach $i (@charsetnames) {
65 print "ENUM_CHARSET($i)\n";
66 }
67
68 print "\n";
69 print "#endif /* ENUM_CHARSETS */\n";
70
71 close BAR;
72
73 open BAR, ">$outheader";
74 select BAR;
75
76 print "/*\n";
77 print " * sbcsdat.h - header file for SBCS data structures.\n";
78 print " *\n";
79 print " * Generated by sbcsgen.pl from sbcs.dat.\n";
80 print " * You should edit those files rather than editing this one.\n";
81 print " */\n";
82 print "\n";
83 print "#ifndef charset_sbcsdat_h\n";
84 print "#define charset_sbcsdat_h\n";
85 print "\n";
86 print "#include \"charset.h\"\n";
87 print "#include \"internal.h\"\n";
88 print "\n";
89 foreach $i (@charsetnames) {
90 print "extern const sbcs_data sbcsdata_$i;\n";
91 }
92 print "\n";
93 print "#endif /* charset_sbcsdat_h */\n";
94
95 close BAR;
96
97 sub outcharset($$$) {
98 my ($name, $vals, $sortpriority) = @_;
99 my ($prefix, $i, @sorted);
100
101 print "const sbcs_data sbcsdata_$name = {\n";
102 print " {\n";
103 $prefix = " ";
104 @sorted = ();
105 for ($i = 0; $i < 256; $i++) {
106 if ($vals->[$i] < 0) {
107 printf "%sERROR ", $prefix;
108 } else {
109 printf "%s0x%04x", $prefix, $vals->[$i];
110 die "ooh? $i\n" unless defined $sortpriority->[$i];
111 push @sorted, [$i, $vals->[$i], 0+$sortpriority->[$i]];
112 }
113 if ($i % 8 == 7) {
114 $prefix = ",\n ";
115 } else {
116 $prefix = ", ";
117 }
118 }
119 print "\n },\n {\n";
120 @sorted = sort { ($a->[1] == $b->[1] ?
121 $b->[2] <=> $a->[2] :
122 $a->[1] <=> $b->[1]) ||
123 $a->[0] <=> $b->[0] } @sorted;
124 $prefix = " ";
125 $uval = -1;
126 for ($i = $j = 0; $i < scalar @sorted; $i++) {
127 next if ($uval == $sorted[$i]->[1]); # low-priority alternative
128 $uval = $sorted[$i]->[1];
129 printf "%s0x%02x", $prefix, $sorted[$i]->[0];
130 if ($j % 8 == 7) {
131 $prefix = ",\n ";
132 } else {
133 $prefix = ", ";
134 }
135 $j++;
136 }
137 printf "\n },\n %d\n", $j;
138 print "};\n";
139 print "const charset_spec charset_$name = {\n" .
140 " $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n";
141 }