1*b1cdbd2cSJim Jagielski#!/usr/bin/perl 2*b1cdbd2cSJim Jagielski#************************************************************** 3*b1cdbd2cSJim Jagielski# 4*b1cdbd2cSJim Jagielski# Licensed to the Apache Software Foundation (ASF) under one 5*b1cdbd2cSJim Jagielski# or more contributor license agreements. See the NOTICE file 6*b1cdbd2cSJim Jagielski# distributed with this work for additional information 7*b1cdbd2cSJim Jagielski# regarding copyright ownership. The ASF licenses this file 8*b1cdbd2cSJim Jagielski# to you under the Apache License, Version 2.0 (the 9*b1cdbd2cSJim Jagielski# "License"); you may not use this file except in compliance 10*b1cdbd2cSJim Jagielski# with the License. You may obtain a copy of the License at 11*b1cdbd2cSJim Jagielski# 12*b1cdbd2cSJim Jagielski# http://www.apache.org/licenses/LICENSE-2.0 13*b1cdbd2cSJim Jagielski# 14*b1cdbd2cSJim Jagielski# Unless required by applicable law or agreed to in writing, 15*b1cdbd2cSJim Jagielski# software distributed under the License is distributed on an 16*b1cdbd2cSJim Jagielski# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17*b1cdbd2cSJim Jagielski# KIND, either express or implied. See the License for the 18*b1cdbd2cSJim Jagielski# specific language governing permissions and limitations 19*b1cdbd2cSJim Jagielski# under the License. 20*b1cdbd2cSJim Jagielski# 21*b1cdbd2cSJim Jagielski#************************************************************** 22*b1cdbd2cSJim Jagielski 23*b1cdbd2cSJim Jagielski 24*b1cdbd2cSJim Jagielski 25*b1cdbd2cSJim Jagielski# The following files must be available in a ./input subdir: 26*b1cdbd2cSJim Jagielski 27*b1cdbd2cSJim Jagielski# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>: 28*b1cdbd2cSJim Jagielski# "Unicode version: 3.1.1 Table version: 1.1 Date: 28 June 2001" 29*b1cdbd2cSJim Jagielski# contains descriptions for: 30*b1cdbd2cSJim Jagielski# U+3400..4DFF CJK Unified Ideographs Extension A 31*b1cdbd2cSJim Jagielski# U+4E00..9FFF CJK Unified Ideographs 32*b1cdbd2cSJim Jagielski# U+F900..FAFF CJK Compatibility Ideographs 33*b1cdbd2cSJim Jagielski# U+20000..2F7FF CJK Unified Ideographs Extension B 34*b1cdbd2cSJim Jagielski# U+2F800..2FFFF CJK Compatibility Ideographs Supplement 35*b1cdbd2cSJim Jagielski 36*b1cdbd2cSJim Jagielski# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>: 37*b1cdbd2cSJim Jagielski# "Unicode version: 1.1 Table version: 0.0d1 Date: 21 October 1994" 38*b1cdbd2cSJim Jagielski# contains mappings for CNS 11643-1986 39*b1cdbd2cSJim Jagielski 40*b1cdbd2cSJim Jagielski# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>: 41*b1cdbd2cSJim Jagielski# "Unicode version: 1.1 Table version: 0.49 Date: 26 March 1998" 42*b1cdbd2cSJim Jagielski# contains mappings for CNS 11643-1992 that are incompatible with 43*b1cdbd2cSJim Jagielski# CNS11643.TXT 44*b1cdbd2cSJim Jagielski 45*b1cdbd2cSJim Jagielski$id = "Cns116431992"; 46*b1cdbd2cSJim Jagielski 47*b1cdbd2cSJim Jagielskisub isValidUtf32 48*b1cdbd2cSJim Jagielski{ 49*b1cdbd2cSJim Jagielski my $utf32 = $_[0]; 50*b1cdbd2cSJim Jagielski return $utf32 >= 0 && $utf32 <= 0x10FFFF 51*b1cdbd2cSJim Jagielski && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) 52*b1cdbd2cSJim Jagielski && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) 53*b1cdbd2cSJim Jagielski && ($utf32 & 0xFFFF) < 0xFFFE; 54*b1cdbd2cSJim Jagielski} 55*b1cdbd2cSJim Jagielski 56*b1cdbd2cSJim Jagielskisub printUtf32 57*b1cdbd2cSJim Jagielski{ 58*b1cdbd2cSJim Jagielski my $utf32 = $_[0]; 59*b1cdbd2cSJim Jagielski return sprintf("U+%04X", $utf32); 60*b1cdbd2cSJim Jagielski} 61*b1cdbd2cSJim Jagielski 62*b1cdbd2cSJim Jagielskisub isValidCns116431992 63*b1cdbd2cSJim Jagielski{ 64*b1cdbd2cSJim Jagielski my $plane = $_[0]; 65*b1cdbd2cSJim Jagielski my $row = $_[1]; 66*b1cdbd2cSJim Jagielski my $column = $_[2]; 67*b1cdbd2cSJim Jagielski return $plane >= 1 && $plane <= 16 68*b1cdbd2cSJim Jagielski && $row >= 1 && $row <= 94 69*b1cdbd2cSJim Jagielski && $column >= 1 && $column <= 94; 70*b1cdbd2cSJim Jagielski} 71*b1cdbd2cSJim Jagielski 72*b1cdbd2cSJim Jagielskisub printCns116431992 73*b1cdbd2cSJim Jagielski{ 74*b1cdbd2cSJim Jagielski my $plane = $_[0]; 75*b1cdbd2cSJim Jagielski my $row = $_[1]; 76*b1cdbd2cSJim Jagielski my $column = $_[2]; 77*b1cdbd2cSJim Jagielski return sprintf("%d-%02d/%02d", $plane, $row, $column); 78*b1cdbd2cSJim Jagielski} 79*b1cdbd2cSJim Jagielski 80*b1cdbd2cSJim Jagielskisub printStats 81*b1cdbd2cSJim Jagielski{ 82*b1cdbd2cSJim Jagielski my $used = $_[0]; 83*b1cdbd2cSJim Jagielski my $space = $_[1]; 84*b1cdbd2cSJim Jagielski return sprintf("%d/%d bytes (%.1f%%)", 85*b1cdbd2cSJim Jagielski $used, 86*b1cdbd2cSJim Jagielski $space, 87*b1cdbd2cSJim Jagielski $used * 100 / $space); 88*b1cdbd2cSJim Jagielski} 89*b1cdbd2cSJim Jagielski 90*b1cdbd2cSJim Jagielskisub printSpaces 91*b1cdbd2cSJim Jagielski{ 92*b1cdbd2cSJim Jagielski my $column_width = $_[0]; 93*b1cdbd2cSJim Jagielski my $columns_per_line = $_[1]; 94*b1cdbd2cSJim Jagielski my $end = $_[2]; 95*b1cdbd2cSJim Jagielski $output = ""; 96*b1cdbd2cSJim Jagielski for ($i = int($end / $columns_per_line) * $columns_per_line; 97*b1cdbd2cSJim Jagielski $i < $end; 98*b1cdbd2cSJim Jagielski ++$i) 99*b1cdbd2cSJim Jagielski { 100*b1cdbd2cSJim Jagielski for ($j = 0; $j < $column_width; ++$j) 101*b1cdbd2cSJim Jagielski { 102*b1cdbd2cSJim Jagielski $output = $output . " "; 103*b1cdbd2cSJim Jagielski } 104*b1cdbd2cSJim Jagielski } 105*b1cdbd2cSJim Jagielski return $output; 106*b1cdbd2cSJim Jagielski} 107*b1cdbd2cSJim Jagielski 108*b1cdbd2cSJim Jagielski$count_Unihan_txt = 0; 109*b1cdbd2cSJim Jagielski$count_CNS11643_TXT = 0; 110*b1cdbd2cSJim Jagielski$count_Uni2CNS = 0; 111*b1cdbd2cSJim Jagielski 112*b1cdbd2cSJim Jagielskiif (1) 113*b1cdbd2cSJim Jagielski{ 114*b1cdbd2cSJim Jagielski $filename = "Unihan.txt"; 115*b1cdbd2cSJim Jagielski open IN, ("input/" . $filename) or die "Cannot read " . $filename; 116*b1cdbd2cSJim Jagielski while (<IN>) 117*b1cdbd2cSJim Jagielski { 118*b1cdbd2cSJim Jagielski if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) 119*b1cdbd2cSJim Jagielski { 120*b1cdbd2cSJim Jagielski $utf32 = oct("0x" . $1); 121*b1cdbd2cSJim Jagielski $cns_plane = oct("0x" . $2); 122*b1cdbd2cSJim Jagielski $cns_row = oct("0x" . $3) - 0x20; 123*b1cdbd2cSJim Jagielski $cns_column = oct("0x" . $4) - 0x20; 124*b1cdbd2cSJim Jagielski isValidUtf32($utf32) 125*b1cdbd2cSJim Jagielski or die "Bad UTF32 char U+" . printUtf32($utf32); 126*b1cdbd2cSJim Jagielski isValidCns116431992($cns_plane, $cns_row, $cns_column) 127*b1cdbd2cSJim Jagielski or die "Bad CNS11643-1992 char " 128*b1cdbd2cSJim Jagielski . printCns116431992($cns_plane, 129*b1cdbd2cSJim Jagielski $cns_row, 130*b1cdbd2cSJim Jagielski $cns_column); 131*b1cdbd2cSJim Jagielski if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 132*b1cdbd2cSJim Jagielski { 133*b1cdbd2cSJim Jagielski $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 134*b1cdbd2cSJim Jagielski $cns_plane_used[$cns_plane] = 1; 135*b1cdbd2cSJim Jagielski ++$count_Unihan_txt; 136*b1cdbd2cSJim Jagielski } 137*b1cdbd2cSJim Jagielski else 138*b1cdbd2cSJim Jagielski { 139*b1cdbd2cSJim Jagielski ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 140*b1cdbd2cSJim Jagielski or die "Mapping " 141*b1cdbd2cSJim Jagielski . printCns116431992($cns_plane, 142*b1cdbd2cSJim Jagielski $cns_row, 143*b1cdbd2cSJim Jagielski $cns_column) 144*b1cdbd2cSJim Jagielski . " to " 145*b1cdbd2cSJim Jagielski . printUtf32($cns_map[$cns_plane] 146*b1cdbd2cSJim Jagielski [$cns_row] 147*b1cdbd2cSJim Jagielski [$cns_column]) 148*b1cdbd2cSJim Jagielski . ", NOT " 149*b1cdbd2cSJim Jagielski . printUtf32($utf32); 150*b1cdbd2cSJim Jagielski } 151*b1cdbd2cSJim Jagielski } 152*b1cdbd2cSJim Jagielski elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) 153*b1cdbd2cSJim Jagielski { 154*b1cdbd2cSJim Jagielski $utf32 = oct("0x" . $1); 155*b1cdbd2cSJim Jagielski $cns_plane = oct("0x" . $2); 156*b1cdbd2cSJim Jagielski $cns_row = oct("0x" . $3) - 0x20; 157*b1cdbd2cSJim Jagielski $cns_column = oct("0x" . $4) - 0x20; 158*b1cdbd2cSJim Jagielski isValidUtf32($utf32) 159*b1cdbd2cSJim Jagielski or die "Bad UTF32 char U+" . printUtf32($utf32); 160*b1cdbd2cSJim Jagielski isValidCns116431992($cns_plane, $cns_row, $cns_column) 161*b1cdbd2cSJim Jagielski or die "Bad CNS11643-1992 char " 162*b1cdbd2cSJim Jagielski . printCns116431992($cns_plane, 163*b1cdbd2cSJim Jagielski $cns_row, 164*b1cdbd2cSJim Jagielski $cns_column); 165*b1cdbd2cSJim Jagielski if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 166*b1cdbd2cSJim Jagielski { 167*b1cdbd2cSJim Jagielski $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 168*b1cdbd2cSJim Jagielski $cns_plane_used[$cns_plane] = 1; 169*b1cdbd2cSJim Jagielski ++$count_Unihan_txt; 170*b1cdbd2cSJim Jagielski } 171*b1cdbd2cSJim Jagielski else 172*b1cdbd2cSJim Jagielski { 173*b1cdbd2cSJim Jagielski ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 174*b1cdbd2cSJim Jagielski or print "WARNING! Mapping ", 175*b1cdbd2cSJim Jagielski printCns116431992($cns_plane, 176*b1cdbd2cSJim Jagielski $cns_row, 177*b1cdbd2cSJim Jagielski $cns_column), 178*b1cdbd2cSJim Jagielski " to ", 179*b1cdbd2cSJim Jagielski printUtf32($cns_map[$cns_plane] 180*b1cdbd2cSJim Jagielski [$cns_row] 181*b1cdbd2cSJim Jagielski [$cns_column]), 182*b1cdbd2cSJim Jagielski ", NOT ", 183*b1cdbd2cSJim Jagielski printUtf32($utf32), 184*b1cdbd2cSJim Jagielski "\n"; 185*b1cdbd2cSJim Jagielski } 186*b1cdbd2cSJim Jagielski } 187*b1cdbd2cSJim Jagielski elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/) 188*b1cdbd2cSJim Jagielski { 189*b1cdbd2cSJim Jagielski die "Bad format"; 190*b1cdbd2cSJim Jagielski } 191*b1cdbd2cSJim Jagielski } 192*b1cdbd2cSJim Jagielski close IN; 193*b1cdbd2cSJim Jagielski} 194*b1cdbd2cSJim Jagielski 195*b1cdbd2cSJim Jagielskiif (1) 196*b1cdbd2cSJim Jagielski{ 197*b1cdbd2cSJim Jagielski $filename = "CNS11643.TXT"; 198*b1cdbd2cSJim Jagielski open IN, ("input/" . $filename) or die "Cannot read " . $filename; 199*b1cdbd2cSJim Jagielski while (<IN>) 200*b1cdbd2cSJim Jagielski { 201*b1cdbd2cSJim Jagielski if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/) 202*b1cdbd2cSJim Jagielski { 203*b1cdbd2cSJim Jagielski $utf32 = oct("0x" . $4); 204*b1cdbd2cSJim Jagielski $cns_plane = oct("0x" . $1); 205*b1cdbd2cSJim Jagielski $cns_row = oct("0x" . $2) - 0x20; 206*b1cdbd2cSJim Jagielski $cns_column = oct("0x" . $3) - 0x20; 207*b1cdbd2cSJim Jagielski isValidUtf32($utf32) 208*b1cdbd2cSJim Jagielski or die "Bad UTF32 char U+" . printUtf32($utf32); 209*b1cdbd2cSJim Jagielski isValidCns116431992($cns_plane, $cns_row, $cns_column) 210*b1cdbd2cSJim Jagielski or die "Bad CNS11643-1992 char " 211*b1cdbd2cSJim Jagielski . printCns116431992($cns_plane, 212*b1cdbd2cSJim Jagielski $cns_row, 213*b1cdbd2cSJim Jagielski $cns_column); 214*b1cdbd2cSJim Jagielski if ($cns_plane <= 2) 215*b1cdbd2cSJim Jagielski { 216*b1cdbd2cSJim Jagielski if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 217*b1cdbd2cSJim Jagielski { 218*b1cdbd2cSJim Jagielski $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 219*b1cdbd2cSJim Jagielski $cns_plane_used[$cns_plane] = 1; 220*b1cdbd2cSJim Jagielski ++$count_CNS11643_TXT; 221*b1cdbd2cSJim Jagielski } 222*b1cdbd2cSJim Jagielski else 223*b1cdbd2cSJim Jagielski { 224*b1cdbd2cSJim Jagielski ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 225*b1cdbd2cSJim Jagielski or die "Mapping " 226*b1cdbd2cSJim Jagielski . printCns116431992($cns_plane, 227*b1cdbd2cSJim Jagielski $cns_row, 228*b1cdbd2cSJim Jagielski $cns_column) 229*b1cdbd2cSJim Jagielski . " to " 230*b1cdbd2cSJim Jagielski . printUtf32($cns_map[$cns_plane] 231*b1cdbd2cSJim Jagielski [$cns_row] 232*b1cdbd2cSJim Jagielski [$cns_column]) 233*b1cdbd2cSJim Jagielski . ", NOT " 234*b1cdbd2cSJim Jagielski . printUtf32($utf32); 235*b1cdbd2cSJim Jagielski } 236*b1cdbd2cSJim Jagielski } 237*b1cdbd2cSJim Jagielski } 238*b1cdbd2cSJim Jagielski } 239*b1cdbd2cSJim Jagielski close IN; 240*b1cdbd2cSJim Jagielski} 241*b1cdbd2cSJim Jagielski 242*b1cdbd2cSJim Jagielskiif (0) 243*b1cdbd2cSJim Jagielski{ 244*b1cdbd2cSJim Jagielski $filename = "Uni2CNS"; 245*b1cdbd2cSJim Jagielski open IN, ("input/" . $filename) or die "Cannot read " . $filename; 246*b1cdbd2cSJim Jagielski while (<IN>) 247*b1cdbd2cSJim Jagielski { 248*b1cdbd2cSJim Jagielski if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/) 249*b1cdbd2cSJim Jagielski { 250*b1cdbd2cSJim Jagielski $utf32 = oct("0x" . $1); 251*b1cdbd2cSJim Jagielski $cns_plane = oct("0x" . $2); 252*b1cdbd2cSJim Jagielski $cns_row = oct("0x" . $3) - 0x20; 253*b1cdbd2cSJim Jagielski $cns_column = oct("0x" . $4) - 0x20; 254*b1cdbd2cSJim Jagielski isValidUtf32($utf32) 255*b1cdbd2cSJim Jagielski or die "Bad UTF32 char U+" . printUtf32($utf32); 256*b1cdbd2cSJim Jagielski isValidCns116431992($cns_plane, $cns_row, $cns_column) 257*b1cdbd2cSJim Jagielski or die "Bad CNS11643-1992 char " 258*b1cdbd2cSJim Jagielski . printCns116431992($cns_plane, 259*b1cdbd2cSJim Jagielski $cns_row, 260*b1cdbd2cSJim Jagielski $cns_column); 261*b1cdbd2cSJim Jagielski if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 262*b1cdbd2cSJim Jagielski { 263*b1cdbd2cSJim Jagielski $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 264*b1cdbd2cSJim Jagielski $cns_plane_used[$cns_plane] = 1; 265*b1cdbd2cSJim Jagielski ++$count_Uni2CNS; 266*b1cdbd2cSJim Jagielski } 267*b1cdbd2cSJim Jagielski else 268*b1cdbd2cSJim Jagielski { 269*b1cdbd2cSJim Jagielski# ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 270*b1cdbd2cSJim Jagielski# or die "Mapping " 271*b1cdbd2cSJim Jagielski# . printCns116431992($cns_plane, 272*b1cdbd2cSJim Jagielski# $cns_row, 273*b1cdbd2cSJim Jagielski# $cns_column) 274*b1cdbd2cSJim Jagielski# . " to " 275*b1cdbd2cSJim Jagielski# . printUtf32($cns_map[$cns_plane] 276*b1cdbd2cSJim Jagielski# [$cns_row] 277*b1cdbd2cSJim Jagielski# [$cns_column]) 278*b1cdbd2cSJim Jagielski# . ", NOT " 279*b1cdbd2cSJim Jagielski# . printUtf32($utf32); 280*b1cdbd2cSJim Jagielski } 281*b1cdbd2cSJim Jagielski if ($cns_plane == 1) 282*b1cdbd2cSJim Jagielski { 283*b1cdbd2cSJim Jagielski print printCns116431992($cns_plane, $cns_row, $cns_column), 284*b1cdbd2cSJim Jagielski "\n"; 285*b1cdbd2cSJim Jagielski } 286*b1cdbd2cSJim Jagielski } 287*b1cdbd2cSJim Jagielski } 288*b1cdbd2cSJim Jagielski close IN; 289*b1cdbd2cSJim Jagielski} 290*b1cdbd2cSJim Jagielski 291*b1cdbd2cSJim Jagielskifor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 292*b1cdbd2cSJim Jagielski{ 293*b1cdbd2cSJim Jagielski if (defined($cns_plane_used[$cns_plane])) 294*b1cdbd2cSJim Jagielski { 295*b1cdbd2cSJim Jagielski for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 296*b1cdbd2cSJim Jagielski { 297*b1cdbd2cSJim Jagielski for ($cns_column = 1; $cns_column <= 94; ++$cns_column) 298*b1cdbd2cSJim Jagielski { 299*b1cdbd2cSJim Jagielski if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 300*b1cdbd2cSJim Jagielski { 301*b1cdbd2cSJim Jagielski $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; 302*b1cdbd2cSJim Jagielski $uni_plane = $utf32 >> 16; 303*b1cdbd2cSJim Jagielski $uni_page = ($utf32 >> 8) & 0xFF; 304*b1cdbd2cSJim Jagielski $uni_index = $utf32 & 0xFF; 305*b1cdbd2cSJim Jagielski if (!defined($uni_plane_used[$uni_plane]) 306*b1cdbd2cSJim Jagielski || !defined($uni_page_used[$uni_plane][$uni_page]) 307*b1cdbd2cSJim Jagielski || !defined($uni_map[$uni_plane] 308*b1cdbd2cSJim Jagielski [$uni_page] 309*b1cdbd2cSJim Jagielski [$uni_index])) 310*b1cdbd2cSJim Jagielski { 311*b1cdbd2cSJim Jagielski $uni_map[$uni_plane][$uni_page][$uni_index] 312*b1cdbd2cSJim Jagielski = ($cns_plane << 16) 313*b1cdbd2cSJim Jagielski | ($cns_row << 8) 314*b1cdbd2cSJim Jagielski | $cns_column; 315*b1cdbd2cSJim Jagielski $uni_plane_used[$uni_plane] = 1; 316*b1cdbd2cSJim Jagielski $uni_page_used[$uni_plane][$uni_page] = 1; 317*b1cdbd2cSJim Jagielski } 318*b1cdbd2cSJim Jagielski else 319*b1cdbd2cSJim Jagielski { 320*b1cdbd2cSJim Jagielski $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index]; 321*b1cdbd2cSJim Jagielski $cns1_plane = $cns1 >> 16; 322*b1cdbd2cSJim Jagielski $cns1_row = ($cns1 >> 8) & 0xFF; 323*b1cdbd2cSJim Jagielski $cns1_column = $cns1 & 0xFF; 324*b1cdbd2cSJim Jagielski 325*b1cdbd2cSJim Jagielski # Do not map from Unicode to Fictious Character Set 326*b1cdbd2cSJim Jagielski # Extensions (Lunde, p. 131), if possible: 327*b1cdbd2cSJim Jagielski if ($cns_plane == 3 328*b1cdbd2cSJim Jagielski && ($cns_row == 66 && $cns_column > 38 329*b1cdbd2cSJim Jagielski || $cns_row > 66)) 330*b1cdbd2cSJim Jagielski { 331*b1cdbd2cSJim Jagielski print " (", 332*b1cdbd2cSJim Jagielski printUtf32($utf32), 333*b1cdbd2cSJim Jagielski " to fictious ", 334*b1cdbd2cSJim Jagielski printCns116431992($cns_plane, 335*b1cdbd2cSJim Jagielski $cns_row, 336*b1cdbd2cSJim Jagielski $cns_column), 337*b1cdbd2cSJim Jagielski " ignored, favouring ", 338*b1cdbd2cSJim Jagielski printCns116431992($cns1_plane, 339*b1cdbd2cSJim Jagielski $cns1_row, 340*b1cdbd2cSJim Jagielski $cns1_column), 341*b1cdbd2cSJim Jagielski ")\n"; 342*b1cdbd2cSJim Jagielski } 343*b1cdbd2cSJim Jagielski elsif ($cns1_plane == 3 344*b1cdbd2cSJim Jagielski && ($cns1_row == 66 && $cns1_column > 38 345*b1cdbd2cSJim Jagielski || $cns1_row > 66)) 346*b1cdbd2cSJim Jagielski { 347*b1cdbd2cSJim Jagielski $uni_map[$uni_plane][$uni_page][$uni_index] 348*b1cdbd2cSJim Jagielski = ($cns_plane << 16) 349*b1cdbd2cSJim Jagielski | ($cns_row << 8) 350*b1cdbd2cSJim Jagielski | $cns_column; 351*b1cdbd2cSJim Jagielski print " (", 352*b1cdbd2cSJim Jagielski printUtf32($utf32), 353*b1cdbd2cSJim Jagielski " to fictious ", 354*b1cdbd2cSJim Jagielski printCns116431992($cns1_plane, 355*b1cdbd2cSJim Jagielski $cns1_row, 356*b1cdbd2cSJim Jagielski $cns1_column), 357*b1cdbd2cSJim Jagielski " ignored, favouring ", 358*b1cdbd2cSJim Jagielski printCns116431992($cns_plane, 359*b1cdbd2cSJim Jagielski $cns_row, 360*b1cdbd2cSJim Jagielski $cns_column), 361*b1cdbd2cSJim Jagielski ")\n"; 362*b1cdbd2cSJim Jagielski } 363*b1cdbd2cSJim Jagielski else 364*b1cdbd2cSJim Jagielski { 365*b1cdbd2cSJim Jagielski print "WARNING! Mapping ", 366*b1cdbd2cSJim Jagielski printUtf32($utf32), 367*b1cdbd2cSJim Jagielski " to ", 368*b1cdbd2cSJim Jagielski printCns116431992($cns1_plane, 369*b1cdbd2cSJim Jagielski $cns1_row, 370*b1cdbd2cSJim Jagielski $cns1_column), 371*b1cdbd2cSJim Jagielski ", NOT ", 372*b1cdbd2cSJim Jagielski printCns116431992($cns_plane, 373*b1cdbd2cSJim Jagielski $cns_row, 374*b1cdbd2cSJim Jagielski $cns_column), 375*b1cdbd2cSJim Jagielski "\n"; 376*b1cdbd2cSJim Jagielski } 377*b1cdbd2cSJim Jagielski } 378*b1cdbd2cSJim Jagielski } 379*b1cdbd2cSJim Jagielski } 380*b1cdbd2cSJim Jagielski } 381*b1cdbd2cSJim Jagielski } 382*b1cdbd2cSJim Jagielski} 383*b1cdbd2cSJim Jagielskiif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) 384*b1cdbd2cSJim Jagielski{ 385*b1cdbd2cSJim Jagielski for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) 386*b1cdbd2cSJim Jagielski { 387*b1cdbd2cSJim Jagielski if (defined($uni_map[0][0][$uni_index])) 388*b1cdbd2cSJim Jagielski { 389*b1cdbd2cSJim Jagielski $cns = $uni_map[0][0][$utf32]; 390*b1cdbd2cSJim Jagielski die "Mapping " 391*b1cdbd2cSJim Jagielski . printUtf32($utf32) 392*b1cdbd2cSJim Jagielski . " to " 393*b1cdbd2cSJim Jagielski . printCns116431992($cns >> 16, 394*b1cdbd2cSJim Jagielski ($cns >> 8) & 0xFF, 395*b1cdbd2cSJim Jagielski $cns & 0xFF); 396*b1cdbd2cSJim Jagielski } 397*b1cdbd2cSJim Jagielski } 398*b1cdbd2cSJim Jagielski} 399*b1cdbd2cSJim Jagielski 400*b1cdbd2cSJim Jagielski$filename = lc($id) . ".tab"; 401*b1cdbd2cSJim Jagielskiopen OUT, ("> " . $filename) or die "Cannot write " . $filename; 402*b1cdbd2cSJim Jagielski 403*b1cdbd2cSJim Jagielski{ 404*b1cdbd2cSJim Jagielski $filename = lc($id). ".pl"; 405*b1cdbd2cSJim Jagielski open IN, $filename or die "Cannot read ". $filename; 406*b1cdbd2cSJim Jagielski $first = 1; 407*b1cdbd2cSJim Jagielski while (<IN>) 408*b1cdbd2cSJim Jagielski { 409*b1cdbd2cSJim Jagielski if (/^\#!.*$/) 410*b1cdbd2cSJim Jagielski { 411*b1cdbd2cSJim Jagielski } 412*b1cdbd2cSJim Jagielski elsif (/^\#(\*.*)$/) 413*b1cdbd2cSJim Jagielski { 414*b1cdbd2cSJim Jagielski if ($first == 1) 415*b1cdbd2cSJim Jagielski { 416*b1cdbd2cSJim Jagielski print OUT "/", $1, "\n"; 417*b1cdbd2cSJim Jagielski $first = 0; 418*b1cdbd2cSJim Jagielski } 419*b1cdbd2cSJim Jagielski else 420*b1cdbd2cSJim Jagielski { 421*b1cdbd2cSJim Jagielski print OUT " ", substr($1, 0, length($1) - 1), "/\n"; 422*b1cdbd2cSJim Jagielski } 423*b1cdbd2cSJim Jagielski } 424*b1cdbd2cSJim Jagielski elsif (/^\# (.*)$/) 425*b1cdbd2cSJim Jagielski { 426*b1cdbd2cSJim Jagielski print OUT " *", $1, "\n"; 427*b1cdbd2cSJim Jagielski } 428*b1cdbd2cSJim Jagielski elsif (/^\#(.*)$/) 429*b1cdbd2cSJim Jagielski { 430*b1cdbd2cSJim Jagielski print OUT " *", $1, "\n"; 431*b1cdbd2cSJim Jagielski } 432*b1cdbd2cSJim Jagielski else 433*b1cdbd2cSJim Jagielski { 434*b1cdbd2cSJim Jagielski goto done; 435*b1cdbd2cSJim Jagielski } 436*b1cdbd2cSJim Jagielski } 437*b1cdbd2cSJim Jagielski done: 438*b1cdbd2cSJim Jagielski} 439*b1cdbd2cSJim Jagielski 440*b1cdbd2cSJim Jagielskiprint OUT "\n", 441*b1cdbd2cSJim Jagielski "#ifndef _SAL_TYPES_H_\n", 442*b1cdbd2cSJim Jagielski "#include \"sal/types.h\"\n", 443*b1cdbd2cSJim Jagielski "#endif\n", 444*b1cdbd2cSJim Jagielski "\n"; 445*b1cdbd2cSJim Jagielski 446*b1cdbd2cSJim Jagielskiprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; 447*b1cdbd2cSJim Jagielski$cns_data_index = 0; 448*b1cdbd2cSJim Jagielskifor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 449*b1cdbd2cSJim Jagielski{ 450*b1cdbd2cSJim Jagielski if (defined($cns_plane_used[$cns_plane])) 451*b1cdbd2cSJim Jagielski { 452*b1cdbd2cSJim Jagielski $cns_rows = 0; 453*b1cdbd2cSJim Jagielski $cns_chars = 0; 454*b1cdbd2cSJim Jagielski for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 455*b1cdbd2cSJim Jagielski { 456*b1cdbd2cSJim Jagielski $cns_row_first = -1; 457*b1cdbd2cSJim Jagielski for ($cns_column = 1; $cns_column <= 94; ++$cns_column) 458*b1cdbd2cSJim Jagielski { 459*b1cdbd2cSJim Jagielski if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 460*b1cdbd2cSJim Jagielski { 461*b1cdbd2cSJim Jagielski if ($cns_row_first == -1) 462*b1cdbd2cSJim Jagielski { 463*b1cdbd2cSJim Jagielski $cns_row_first = $cns_column; 464*b1cdbd2cSJim Jagielski } 465*b1cdbd2cSJim Jagielski $cns_row_last = $cns_column; 466*b1cdbd2cSJim Jagielski } 467*b1cdbd2cSJim Jagielski } 468*b1cdbd2cSJim Jagielski if ($cns_row_first != -1) 469*b1cdbd2cSJim Jagielski { 470*b1cdbd2cSJim Jagielski $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index; 471*b1cdbd2cSJim Jagielski ++$cns_rows; 472*b1cdbd2cSJim Jagielski print OUT " /* plane ", $cns_plane, ", row ", $cns_row, 473*b1cdbd2cSJim Jagielski " */\n"; 474*b1cdbd2cSJim Jagielski 475*b1cdbd2cSJim Jagielski $cns_row_surrogates_first = -1; 476*b1cdbd2cSJim Jagielski $cns_row_chars = 0; 477*b1cdbd2cSJim Jagielski $cns_row_surrogates = 0; 478*b1cdbd2cSJim Jagielski 479*b1cdbd2cSJim Jagielski print OUT " ", $cns_row_first, " | (", $cns_row_last, 480*b1cdbd2cSJim Jagielski " << 8), /* first, last */\n"; 481*b1cdbd2cSJim Jagielski ++$cns_data_index; 482*b1cdbd2cSJim Jagielski 483*b1cdbd2cSJim Jagielski print OUT " ", printSpaces(7, 10, $cns_row_first); 484*b1cdbd2cSJim Jagielski $bol = 0; 485*b1cdbd2cSJim Jagielski for ($cns_column = $cns_row_first; 486*b1cdbd2cSJim Jagielski $cns_column <= $cns_row_last; 487*b1cdbd2cSJim Jagielski ++$cns_column) 488*b1cdbd2cSJim Jagielski { 489*b1cdbd2cSJim Jagielski if ($bol == 1) 490*b1cdbd2cSJim Jagielski { 491*b1cdbd2cSJim Jagielski print OUT " "; 492*b1cdbd2cSJim Jagielski $bol = 0; 493*b1cdbd2cSJim Jagielski } 494*b1cdbd2cSJim Jagielski if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 495*b1cdbd2cSJim Jagielski { 496*b1cdbd2cSJim Jagielski $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; 497*b1cdbd2cSJim Jagielski ++$cns_row_chars; 498*b1cdbd2cSJim Jagielski if ($utf32 <= 0xFFFF) 499*b1cdbd2cSJim Jagielski { 500*b1cdbd2cSJim Jagielski printf OUT "0x%04X,", $utf32; 501*b1cdbd2cSJim Jagielski } 502*b1cdbd2cSJim Jagielski else 503*b1cdbd2cSJim Jagielski { 504*b1cdbd2cSJim Jagielski ++$cns_row_surrogates; 505*b1cdbd2cSJim Jagielski printf OUT "0x%04X,", 506*b1cdbd2cSJim Jagielski (0xD800 | (($utf32 - 0x10000) >> 10)); 507*b1cdbd2cSJim Jagielski if ($cns_row_surrogates_first == -1) 508*b1cdbd2cSJim Jagielski { 509*b1cdbd2cSJim Jagielski $cns_row_surrogates_first = $cns_column; 510*b1cdbd2cSJim Jagielski } 511*b1cdbd2cSJim Jagielski $cns_row_surrogates_last = $cns_column; 512*b1cdbd2cSJim Jagielski } 513*b1cdbd2cSJim Jagielski } 514*b1cdbd2cSJim Jagielski else 515*b1cdbd2cSJim Jagielski { 516*b1cdbd2cSJim Jagielski printf OUT "0xffff,"; 517*b1cdbd2cSJim Jagielski } 518*b1cdbd2cSJim Jagielski ++$cns_data_index; 519*b1cdbd2cSJim Jagielski if ($cns_column % 10 == 9) 520*b1cdbd2cSJim Jagielski { 521*b1cdbd2cSJim Jagielski print OUT "\n"; 522*b1cdbd2cSJim Jagielski $bol = 1; 523*b1cdbd2cSJim Jagielski } 524*b1cdbd2cSJim Jagielski } 525*b1cdbd2cSJim Jagielski if ($bol == 0) 526*b1cdbd2cSJim Jagielski { 527*b1cdbd2cSJim Jagielski print OUT "\n"; 528*b1cdbd2cSJim Jagielski } 529*b1cdbd2cSJim Jagielski 530*b1cdbd2cSJim Jagielski if ($cns_row_surrogates_first != -1) 531*b1cdbd2cSJim Jagielski { 532*b1cdbd2cSJim Jagielski print OUT " ", $cns_row_surrogates_first, 533*b1cdbd2cSJim Jagielski ", /* first low-surrogate */\n"; 534*b1cdbd2cSJim Jagielski ++$cns_data_index; 535*b1cdbd2cSJim Jagielski 536*b1cdbd2cSJim Jagielski print OUT " ", 537*b1cdbd2cSJim Jagielski printSpaces(7, 10, $cns_row_surrogates_first); 538*b1cdbd2cSJim Jagielski $bol = 0; 539*b1cdbd2cSJim Jagielski for ($cns_column = $cns_row_surrogates_first; 540*b1cdbd2cSJim Jagielski $cns_column <= $cns_row_surrogates_last; 541*b1cdbd2cSJim Jagielski ++$cns_column) 542*b1cdbd2cSJim Jagielski { 543*b1cdbd2cSJim Jagielski if ($bol == 1) 544*b1cdbd2cSJim Jagielski { 545*b1cdbd2cSJim Jagielski print OUT " "; 546*b1cdbd2cSJim Jagielski $bol = 0; 547*b1cdbd2cSJim Jagielski } 548*b1cdbd2cSJim Jagielski $utf32 = 0; 549*b1cdbd2cSJim Jagielski if (defined($cns_map[$cns_plane] 550*b1cdbd2cSJim Jagielski [$cns_row] 551*b1cdbd2cSJim Jagielski [$cns_column])) 552*b1cdbd2cSJim Jagielski { 553*b1cdbd2cSJim Jagielski $utf32 554*b1cdbd2cSJim Jagielski = $cns_map[$cns_plane][$cns_row][$cns_column]; 555*b1cdbd2cSJim Jagielski } 556*b1cdbd2cSJim Jagielski if ($utf32 <= 0xFFFF) 557*b1cdbd2cSJim Jagielski { 558*b1cdbd2cSJim Jagielski printf OUT " 0,"; 559*b1cdbd2cSJim Jagielski } 560*b1cdbd2cSJim Jagielski else 561*b1cdbd2cSJim Jagielski { 562*b1cdbd2cSJim Jagielski printf OUT "0x%04X,", 563*b1cdbd2cSJim Jagielski (0xDC00 564*b1cdbd2cSJim Jagielski | (($utf32 - 0x10000) & 0x3FF)); 565*b1cdbd2cSJim Jagielski } 566*b1cdbd2cSJim Jagielski ++$cns_data_index; 567*b1cdbd2cSJim Jagielski if ($cns_column % 10 == 9) 568*b1cdbd2cSJim Jagielski { 569*b1cdbd2cSJim Jagielski print OUT "\n"; 570*b1cdbd2cSJim Jagielski $bol = 1; 571*b1cdbd2cSJim Jagielski } 572*b1cdbd2cSJim Jagielski } 573*b1cdbd2cSJim Jagielski if ($bol == 0) 574*b1cdbd2cSJim Jagielski { 575*b1cdbd2cSJim Jagielski print OUT "\n"; 576*b1cdbd2cSJim Jagielski } 577*b1cdbd2cSJim Jagielski } 578*b1cdbd2cSJim Jagielski 579*b1cdbd2cSJim Jagielski $cns_chars += $cns_row_chars; 580*b1cdbd2cSJim Jagielski $cns_data_space[$cns_plane][$cns_row] 581*b1cdbd2cSJim Jagielski = ($cns_data_index 582*b1cdbd2cSJim Jagielski - $cns_data_offsets[$cns_plane][$cns_row]) * 2; 583*b1cdbd2cSJim Jagielski $cns_data_used[$cns_plane][$cns_row] 584*b1cdbd2cSJim Jagielski = (1 + $cns_row_chars 585*b1cdbd2cSJim Jagielski + ($cns_row_surrogates == 0 ? 586*b1cdbd2cSJim Jagielski 0 : 1 + $cns_row_surrogates)) * 2; 587*b1cdbd2cSJim Jagielski } 588*b1cdbd2cSJim Jagielski else 589*b1cdbd2cSJim Jagielski { 590*b1cdbd2cSJim Jagielski print OUT " /* plane ", $cns_plane, ", row ", $cns_row, 591*b1cdbd2cSJim Jagielski ": --- */\n"; 592*b1cdbd2cSJim Jagielski $cns_data_offsets[$cns_plane][$cns_row] = -1; 593*b1cdbd2cSJim Jagielski } 594*b1cdbd2cSJim Jagielski } 595*b1cdbd2cSJim Jagielski print "cns plane ", 596*b1cdbd2cSJim Jagielski $cns_plane, 597*b1cdbd2cSJim Jagielski ": rows = ", 598*b1cdbd2cSJim Jagielski $cns_rows, 599*b1cdbd2cSJim Jagielski ", chars = ", 600*b1cdbd2cSJim Jagielski $cns_chars, 601*b1cdbd2cSJim Jagielski "\n"; 602*b1cdbd2cSJim Jagielski } 603*b1cdbd2cSJim Jagielski} 604*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 605*b1cdbd2cSJim Jagielski 606*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; 607*b1cdbd2cSJim Jagielskifor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 608*b1cdbd2cSJim Jagielski{ 609*b1cdbd2cSJim Jagielski if (defined ($cns_plane_used[$cns_plane])) 610*b1cdbd2cSJim Jagielski { 611*b1cdbd2cSJim Jagielski $cns_rowoffsets_used[$cns_plane] = 0; 612*b1cdbd2cSJim Jagielski for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 613*b1cdbd2cSJim Jagielski { 614*b1cdbd2cSJim Jagielski if ($cns_data_offsets[$cns_plane][$cns_row] == -1) 615*b1cdbd2cSJim Jagielski { 616*b1cdbd2cSJim Jagielski print OUT " -1, /* plane ", 617*b1cdbd2cSJim Jagielski $cns_plane, 618*b1cdbd2cSJim Jagielski ", row ", 619*b1cdbd2cSJim Jagielski $cns_row, 620*b1cdbd2cSJim Jagielski " */\n"; 621*b1cdbd2cSJim Jagielski } 622*b1cdbd2cSJim Jagielski else 623*b1cdbd2cSJim Jagielski { 624*b1cdbd2cSJim Jagielski print OUT " ", 625*b1cdbd2cSJim Jagielski $cns_data_offsets[$cns_plane][$cns_row], 626*b1cdbd2cSJim Jagielski ", /* plane ", 627*b1cdbd2cSJim Jagielski $cns_plane, 628*b1cdbd2cSJim Jagielski ", row ", 629*b1cdbd2cSJim Jagielski $cns_row, 630*b1cdbd2cSJim Jagielski "; ", 631*b1cdbd2cSJim Jagielski printStats($cns_data_used[$cns_plane][$cns_row], 632*b1cdbd2cSJim Jagielski $cns_data_space[$cns_plane][$cns_row]), 633*b1cdbd2cSJim Jagielski " */\n"; 634*b1cdbd2cSJim Jagielski $cns_rowoffsets_used[$cns_plane] += 4; 635*b1cdbd2cSJim Jagielski } 636*b1cdbd2cSJim Jagielski } 637*b1cdbd2cSJim Jagielski } 638*b1cdbd2cSJim Jagielski else 639*b1cdbd2cSJim Jagielski { 640*b1cdbd2cSJim Jagielski print OUT " /* plane ", $cns_plane, ": --- */\n"; 641*b1cdbd2cSJim Jagielski } 642*b1cdbd2cSJim Jagielski} 643*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 644*b1cdbd2cSJim Jagielski 645*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImpl", 646*b1cdbd2cSJim Jagielski $id, 647*b1cdbd2cSJim Jagielski "ToUnicodePlaneOffsets[] = {\n"; 648*b1cdbd2cSJim Jagielski$cns_row_offset = 0; 649*b1cdbd2cSJim Jagielskifor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 650*b1cdbd2cSJim Jagielski{ 651*b1cdbd2cSJim Jagielski if (defined ($cns_plane_used[$cns_plane])) 652*b1cdbd2cSJim Jagielski { 653*b1cdbd2cSJim Jagielski print OUT " ", 654*b1cdbd2cSJim Jagielski $cns_row_offset++, 655*b1cdbd2cSJim Jagielski " * 94, /* plane ", 656*b1cdbd2cSJim Jagielski $cns_plane, 657*b1cdbd2cSJim Jagielski "; ", 658*b1cdbd2cSJim Jagielski printStats($cns_rowoffsets_used[$cns_plane], 94 * 4), 659*b1cdbd2cSJim Jagielski " */\n"; 660*b1cdbd2cSJim Jagielski } 661*b1cdbd2cSJim Jagielski else 662*b1cdbd2cSJim Jagielski { 663*b1cdbd2cSJim Jagielski print OUT " -1, /* plane ", $cns_plane, " */\n"; 664*b1cdbd2cSJim Jagielski } 665*b1cdbd2cSJim Jagielski} 666*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 667*b1cdbd2cSJim Jagielski 668*b1cdbd2cSJim Jagielskiprint OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n"; 669*b1cdbd2cSJim Jagielski$uni_data_index = 0; 670*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 671*b1cdbd2cSJim Jagielski{ 672*b1cdbd2cSJim Jagielski if (defined($uni_plane_used[$uni_plane])) 673*b1cdbd2cSJim Jagielski { 674*b1cdbd2cSJim Jagielski for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 675*b1cdbd2cSJim Jagielski { 676*b1cdbd2cSJim Jagielski if (defined($uni_page_used[$uni_plane][$uni_page])) 677*b1cdbd2cSJim Jagielski { 678*b1cdbd2cSJim Jagielski $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; 679*b1cdbd2cSJim Jagielski print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 680*b1cdbd2cSJim Jagielski " */\n"; 681*b1cdbd2cSJim Jagielski 682*b1cdbd2cSJim Jagielski $uni_page_first = -1; 683*b1cdbd2cSJim Jagielski for ($uni_index = 0; $uni_index <= 255; ++$uni_index) 684*b1cdbd2cSJim Jagielski { 685*b1cdbd2cSJim Jagielski if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 686*b1cdbd2cSJim Jagielski { 687*b1cdbd2cSJim Jagielski if ($uni_page_first == -1) 688*b1cdbd2cSJim Jagielski { 689*b1cdbd2cSJim Jagielski $uni_page_first = $uni_index; 690*b1cdbd2cSJim Jagielski } 691*b1cdbd2cSJim Jagielski $uni_page_last = $uni_index; 692*b1cdbd2cSJim Jagielski } 693*b1cdbd2cSJim Jagielski } 694*b1cdbd2cSJim Jagielski 695*b1cdbd2cSJim Jagielski $uni_data_used[$uni_plane][$uni_page] = 0; 696*b1cdbd2cSJim Jagielski 697*b1cdbd2cSJim Jagielski print OUT " ", $uni_page_first, ", ", $uni_page_last, 698*b1cdbd2cSJim Jagielski ", /* first, last */\n"; 699*b1cdbd2cSJim Jagielski $uni_data_index += 2; 700*b1cdbd2cSJim Jagielski $uni_data_used[$uni_plane][$uni_page] += 2; 701*b1cdbd2cSJim Jagielski 702*b1cdbd2cSJim Jagielski print OUT " ", printSpaces(9, 8, $uni_page_first); 703*b1cdbd2cSJim Jagielski $bol = 0; 704*b1cdbd2cSJim Jagielski for ($uni_index = $uni_page_first; 705*b1cdbd2cSJim Jagielski $uni_index <= $uni_page_last; 706*b1cdbd2cSJim Jagielski ++$uni_index) 707*b1cdbd2cSJim Jagielski { 708*b1cdbd2cSJim Jagielski if ($bol == 1) 709*b1cdbd2cSJim Jagielski { 710*b1cdbd2cSJim Jagielski print OUT " "; 711*b1cdbd2cSJim Jagielski $bol = 0; 712*b1cdbd2cSJim Jagielski } 713*b1cdbd2cSJim Jagielski if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 714*b1cdbd2cSJim Jagielski { 715*b1cdbd2cSJim Jagielski $cns = $uni_map[$uni_plane][$uni_page][$uni_index]; 716*b1cdbd2cSJim Jagielski printf OUT "%2d,%2d,%2d,", 717*b1cdbd2cSJim Jagielski $cns >> 16, 718*b1cdbd2cSJim Jagielski $cns >> 8 & 0xFF, 719*b1cdbd2cSJim Jagielski $cns & 0xFF; 720*b1cdbd2cSJim Jagielski $uni_data_used[$uni_plane][$uni_page] += 3; 721*b1cdbd2cSJim Jagielski } 722*b1cdbd2cSJim Jagielski else 723*b1cdbd2cSJim Jagielski { 724*b1cdbd2cSJim Jagielski print OUT " 0, 0, 0,"; 725*b1cdbd2cSJim Jagielski } 726*b1cdbd2cSJim Jagielski $uni_data_index += 3; 727*b1cdbd2cSJim Jagielski if ($uni_index % 8 == 7) 728*b1cdbd2cSJim Jagielski { 729*b1cdbd2cSJim Jagielski print OUT "\n"; 730*b1cdbd2cSJim Jagielski $bol = 1; 731*b1cdbd2cSJim Jagielski } 732*b1cdbd2cSJim Jagielski } 733*b1cdbd2cSJim Jagielski if ($bol == 0) 734*b1cdbd2cSJim Jagielski { 735*b1cdbd2cSJim Jagielski print OUT "\n"; 736*b1cdbd2cSJim Jagielski } 737*b1cdbd2cSJim Jagielski 738*b1cdbd2cSJim Jagielski $uni_data_space[$uni_plane][$uni_page] 739*b1cdbd2cSJim Jagielski = $uni_data_index 740*b1cdbd2cSJim Jagielski - $uni_data_offsets[$uni_plane][$uni_page]; 741*b1cdbd2cSJim Jagielski } 742*b1cdbd2cSJim Jagielski else 743*b1cdbd2cSJim Jagielski { 744*b1cdbd2cSJim Jagielski $uni_data_offsets[$uni_plane][$uni_page] = -1; 745*b1cdbd2cSJim Jagielski print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 746*b1cdbd2cSJim Jagielski ": --- */\n"; 747*b1cdbd2cSJim Jagielski } 748*b1cdbd2cSJim Jagielski } 749*b1cdbd2cSJim Jagielski } 750*b1cdbd2cSJim Jagielski else 751*b1cdbd2cSJim Jagielski { 752*b1cdbd2cSJim Jagielski print OUT " /* plane ", $uni_plane, ": --- */\n"; 753*b1cdbd2cSJim Jagielski } 754*b1cdbd2cSJim Jagielski} 755*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 756*b1cdbd2cSJim Jagielski 757*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; 758*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 759*b1cdbd2cSJim Jagielski{ 760*b1cdbd2cSJim Jagielski if (defined($uni_plane_used[$uni_plane])) 761*b1cdbd2cSJim Jagielski { 762*b1cdbd2cSJim Jagielski $uni_pageoffsets_used[$uni_plane] = 0; 763*b1cdbd2cSJim Jagielski $uni_data_used_sum[$uni_plane] = 0; 764*b1cdbd2cSJim Jagielski $uni_data_space_sum[$uni_plane] = 0; 765*b1cdbd2cSJim Jagielski for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 766*b1cdbd2cSJim Jagielski { 767*b1cdbd2cSJim Jagielski $offset = $uni_data_offsets[$uni_plane][$uni_page]; 768*b1cdbd2cSJim Jagielski if ($offset == -1) 769*b1cdbd2cSJim Jagielski { 770*b1cdbd2cSJim Jagielski print OUT " -1, /* plane ", 771*b1cdbd2cSJim Jagielski $uni_plane, 772*b1cdbd2cSJim Jagielski ", page ", 773*b1cdbd2cSJim Jagielski $uni_page, 774*b1cdbd2cSJim Jagielski " */\n"; 775*b1cdbd2cSJim Jagielski } 776*b1cdbd2cSJim Jagielski else 777*b1cdbd2cSJim Jagielski { 778*b1cdbd2cSJim Jagielski print OUT " ", 779*b1cdbd2cSJim Jagielski $offset, 780*b1cdbd2cSJim Jagielski ", /* plane ", 781*b1cdbd2cSJim Jagielski $uni_plane, 782*b1cdbd2cSJim Jagielski ", page ", 783*b1cdbd2cSJim Jagielski $uni_page, 784*b1cdbd2cSJim Jagielski "; ", 785*b1cdbd2cSJim Jagielski printStats($uni_data_used[$uni_plane][$uni_page], 786*b1cdbd2cSJim Jagielski $uni_data_space[$uni_plane][$uni_page]), 787*b1cdbd2cSJim Jagielski " */\n"; 788*b1cdbd2cSJim Jagielski $uni_pageoffsets_used[$uni_plane] += 4; 789*b1cdbd2cSJim Jagielski $uni_data_used_sum[$uni_plane] 790*b1cdbd2cSJim Jagielski += $uni_data_used[$uni_plane][$uni_page]; 791*b1cdbd2cSJim Jagielski $uni_data_space_sum[$uni_plane] 792*b1cdbd2cSJim Jagielski += $uni_data_space[$uni_plane][$uni_page]; 793*b1cdbd2cSJim Jagielski } 794*b1cdbd2cSJim Jagielski } 795*b1cdbd2cSJim Jagielski } 796*b1cdbd2cSJim Jagielski else 797*b1cdbd2cSJim Jagielski { 798*b1cdbd2cSJim Jagielski print OUT " /* plane ", $uni_plane, ": --- */\n"; 799*b1cdbd2cSJim Jagielski } 800*b1cdbd2cSJim Jagielski} 801*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 802*b1cdbd2cSJim Jagielski 803*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImplUnicodeTo", 804*b1cdbd2cSJim Jagielski $id, 805*b1cdbd2cSJim Jagielski "PlaneOffsets[] = {\n"; 806*b1cdbd2cSJim Jagielski$uni_page_offset = 0; 807*b1cdbd2cSJim Jagielski$uni_planeoffsets_used = 0; 808*b1cdbd2cSJim Jagielski$uni_pageoffsets_used_sum = 0; 809*b1cdbd2cSJim Jagielski$uni_pageoffsets_space_sum = 0; 810*b1cdbd2cSJim Jagielski$uni_data_used_sum2 = 0; 811*b1cdbd2cSJim Jagielski$uni_data_space_sum2 = 0; 812*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 813*b1cdbd2cSJim Jagielski{ 814*b1cdbd2cSJim Jagielski if (defined ($uni_plane_used[$uni_plane])) 815*b1cdbd2cSJim Jagielski { 816*b1cdbd2cSJim Jagielski print OUT " ", 817*b1cdbd2cSJim Jagielski $uni_page_offset++, 818*b1cdbd2cSJim Jagielski " * 256, /* plane ", 819*b1cdbd2cSJim Jagielski $uni_plane, 820*b1cdbd2cSJim Jagielski "; ", 821*b1cdbd2cSJim Jagielski printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), 822*b1cdbd2cSJim Jagielski ", ", 823*b1cdbd2cSJim Jagielski printStats($uni_data_used_sum[$uni_plane], 824*b1cdbd2cSJim Jagielski $uni_data_space_sum[$uni_plane]), 825*b1cdbd2cSJim Jagielski " */\n"; 826*b1cdbd2cSJim Jagielski $uni_planeoffsets_used += 4; 827*b1cdbd2cSJim Jagielski $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; 828*b1cdbd2cSJim Jagielski $uni_pageoffsets_space_sum += 256 * 4; 829*b1cdbd2cSJim Jagielski $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; 830*b1cdbd2cSJim Jagielski $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; 831*b1cdbd2cSJim Jagielski } 832*b1cdbd2cSJim Jagielski else 833*b1cdbd2cSJim Jagielski { 834*b1cdbd2cSJim Jagielski print OUT " -1, /* plane ", $uni_plane, " */\n"; 835*b1cdbd2cSJim Jagielski } 836*b1cdbd2cSJim Jagielski} 837*b1cdbd2cSJim Jagielskiprint OUT " /* ", 838*b1cdbd2cSJim Jagielski printStats($uni_planeoffsets_used, 17 * 4), 839*b1cdbd2cSJim Jagielski ", ", 840*b1cdbd2cSJim Jagielski printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), 841*b1cdbd2cSJim Jagielski ", ", 842*b1cdbd2cSJim Jagielski printStats($uni_data_used_sum2, $uni_data_space_sum2), 843*b1cdbd2cSJim Jagielski " */\n};\n"; 844*b1cdbd2cSJim Jagielski 845*b1cdbd2cSJim Jagielskiclose OUT; 846*b1cdbd2cSJim Jagielski 847*b1cdbd2cSJim Jagielskiprint "Unihan.txt = ", $count_Unihan_txt, 848*b1cdbd2cSJim Jagielski ", CNS11643.TXT = ", $count_CNS11643_TXT, 849*b1cdbd2cSJim Jagielski ", Uni2CNS = ", $count_Uni2CNS, 850*b1cdbd2cSJim Jagielski ", total = ", 851*b1cdbd2cSJim Jagielski ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS), 852*b1cdbd2cSJim Jagielski "\n"; 853