+sub read_prop_with_ranges {
+ my $path = shift;
+ my $propkey = shift;
+ input($path);
+ while(<>) {
+ chomp;
+ s/\s*\#.*//;
+ next if $_ eq '';
+ my ($range, $propval) = split(/\s*;\s*/, $_);
+ if($range =~ /(.*)\.\.(.*)/) {
+ for my $c (hex($1) .. hex($2)) {
+ if(exists $data{$c}) {
+ $data{$c}->{$propkey} = $propval;
+ }
+ }
+ } else {
+ my $c = hex($range);
+ if(exists $data{$c}) {
+ $data{$c}->{$propkey} = $propval;
+ }
+ }
+ }
+}
+
+# Grapheme_Break etc
+# NB we do this BEFORE filling in blanks so that the Hangul characters
+# don't get filled in; we can compute their properties mechanically.
+read_prop_with_ranges("auxiliary/GraphemeBreakProperty.txt", "gbreak");
+read_prop_with_ranges("auxiliary/WordBreakProperty.txt", "wbreak");
+read_prop_with_ranges("auxiliary/SentenceBreakProperty.txt", "sbreak");
+
+# Compute the full list and fill in the Extend category properly
+my %gbreak = ();
+my %wbreak = ();
+my %sbreak = ();
+for my $c (keys %data) {
+ if(!exists $data{$c}->{gbreak}) {
+ $data{$c}->{gbreak} = 'Other';
+ }
+ $gbreak{$data{$c}->{gbreak}} = 1;
+
+ if(!exists $data{$c}->{wbreak}) {
+ if($data{$c}->{gbreak} eq 'Extend') {
+ $data{$c}->{wbreak} = 'Extend';
+ } else {
+ $data{$c}->{wbreak} = 'Other';
+ }
+ }
+ $wbreak{$data{$c}->{wbreak}} = 1;
+
+ if(!exists $data{$c}->{sbreak}) {
+ if($data{$c}->{gbreak} eq 'Extend') {
+ $data{$c}->{sbreak} = 'Extend';
+ } else {
+ $data{$c}->{sbreak} = 'Other';
+ }
+ }
+ $sbreak{$data{$c}->{sbreak}} = 1;
+}
+