1cdf0e10cSrcweir#!/usr/bin/perl 27e90fac2SAndrew Rist#************************************************************** 37e90fac2SAndrew Rist# 47e90fac2SAndrew Rist# Licensed to the Apache Software Foundation (ASF) under one 57e90fac2SAndrew Rist# or more contributor license agreements. See the NOTICE file 67e90fac2SAndrew Rist# distributed with this work for additional information 77e90fac2SAndrew Rist# regarding copyright ownership. The ASF licenses this file 87e90fac2SAndrew Rist# to you under the Apache License, Version 2.0 (the 97e90fac2SAndrew Rist# "License"); you may not use this file except in compliance 107e90fac2SAndrew Rist# with the License. You may obtain a copy of the License at 117e90fac2SAndrew Rist# 127e90fac2SAndrew Rist# http://www.apache.org/licenses/LICENSE-2.0 137e90fac2SAndrew Rist# 147e90fac2SAndrew Rist# Unless required by applicable law or agreed to in writing, 157e90fac2SAndrew Rist# software distributed under the License is distributed on an 167e90fac2SAndrew Rist# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 177e90fac2SAndrew Rist# KIND, either express or implied. See the License for the 187e90fac2SAndrew Rist# specific language governing permissions and limitations 197e90fac2SAndrew Rist# under the License. 207e90fac2SAndrew Rist# 217e90fac2SAndrew Rist#************************************************************** 227e90fac2SAndrew Rist 237e90fac2SAndrew Rist 24cdf0e10cSrcweir 25cdf0e10cSrcweir# The following files must be available in a ./input subdir: 26cdf0e10cSrcweir 27cdf0e10cSrcweir# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>: 28cdf0e10cSrcweir# "Unicode version: 3.1.1 Table version: 1.1 Date: 28 June 2001" 29cdf0e10cSrcweir# contains descriptions for: 30cdf0e10cSrcweir# U+3400..4DFF CJK Unified Ideographs Extension A 31cdf0e10cSrcweir# U+4E00..9FFF CJK Unified Ideographs 32cdf0e10cSrcweir# U+F900..FAFF CJK Compatibility Ideographs 33cdf0e10cSrcweir# U+20000..2F7FF CJK Unified Ideographs Extension B 34cdf0e10cSrcweir# U+2F800..2FFFF CJK Compatibility Ideographs Supplement 35cdf0e10cSrcweir 36cdf0e10cSrcweir# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>: 37cdf0e10cSrcweir# "Unicode version: 1.1 Table version: 0.0d1 Date: 21 October 1994" 38cdf0e10cSrcweir# contains mappings for CNS 11643-1986 39cdf0e10cSrcweir 40cdf0e10cSrcweir# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>: 41cdf0e10cSrcweir# "Unicode version: 1.1 Table version: 0.49 Date: 26 March 1998" 42cdf0e10cSrcweir# contains mappings for CNS 11643-1992 that are incompatible with 43cdf0e10cSrcweir# CNS11643.TXT 44cdf0e10cSrcweir 45cdf0e10cSrcweir$id = "Cns116431992"; 46cdf0e10cSrcweir 47cdf0e10cSrcweirsub isValidUtf32 48cdf0e10cSrcweir{ 49cdf0e10cSrcweir my $utf32 = $_[0]; 50cdf0e10cSrcweir return $utf32 >= 0 && $utf32 <= 0x10FFFF 51cdf0e10cSrcweir && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) 52cdf0e10cSrcweir && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) 53cdf0e10cSrcweir && ($utf32 & 0xFFFF) < 0xFFFE; 54cdf0e10cSrcweir} 55cdf0e10cSrcweir 56cdf0e10cSrcweirsub printUtf32 57cdf0e10cSrcweir{ 58cdf0e10cSrcweir my $utf32 = $_[0]; 59cdf0e10cSrcweir return sprintf("U+%04X", $utf32); 60cdf0e10cSrcweir} 61cdf0e10cSrcweir 62cdf0e10cSrcweirsub isValidCns116431992 63cdf0e10cSrcweir{ 64cdf0e10cSrcweir my $plane = $_[0]; 65cdf0e10cSrcweir my $row = $_[1]; 66cdf0e10cSrcweir my $column = $_[2]; 67cdf0e10cSrcweir return $plane >= 1 && $plane <= 16 68cdf0e10cSrcweir && $row >= 1 && $row <= 94 69cdf0e10cSrcweir && $column >= 1 && $column <= 94; 70cdf0e10cSrcweir} 71cdf0e10cSrcweir 72cdf0e10cSrcweirsub printCns116431992 73cdf0e10cSrcweir{ 74cdf0e10cSrcweir my $plane = $_[0]; 75cdf0e10cSrcweir my $row = $_[1]; 76cdf0e10cSrcweir my $column = $_[2]; 77cdf0e10cSrcweir return sprintf("%d-%02d/%02d", $plane, $row, $column); 78cdf0e10cSrcweir} 79cdf0e10cSrcweir 80cdf0e10cSrcweirsub printStats 81cdf0e10cSrcweir{ 82cdf0e10cSrcweir my $used = $_[0]; 83cdf0e10cSrcweir my $space = $_[1]; 84cdf0e10cSrcweir return sprintf("%d/%d bytes (%.1f%%)", 85cdf0e10cSrcweir $used, 86cdf0e10cSrcweir $space, 87cdf0e10cSrcweir $used * 100 / $space); 88cdf0e10cSrcweir} 89cdf0e10cSrcweir 90cdf0e10cSrcweirsub printSpaces 91cdf0e10cSrcweir{ 92cdf0e10cSrcweir my $column_width = $_[0]; 93cdf0e10cSrcweir my $columns_per_line = $_[1]; 94cdf0e10cSrcweir my $end = $_[2]; 95cdf0e10cSrcweir $output = ""; 96cdf0e10cSrcweir for ($i = int($end / $columns_per_line) * $columns_per_line; 97cdf0e10cSrcweir $i < $end; 98cdf0e10cSrcweir ++$i) 99cdf0e10cSrcweir { 100cdf0e10cSrcweir for ($j = 0; $j < $column_width; ++$j) 101cdf0e10cSrcweir { 102cdf0e10cSrcweir $output = $output . " "; 103cdf0e10cSrcweir } 104cdf0e10cSrcweir } 105cdf0e10cSrcweir return $output; 106cdf0e10cSrcweir} 107cdf0e10cSrcweir 108cdf0e10cSrcweir$count_Unihan_txt = 0; 109cdf0e10cSrcweir$count_CNS11643_TXT = 0; 110cdf0e10cSrcweir$count_Uni2CNS = 0; 111cdf0e10cSrcweir 112cdf0e10cSrcweirif (1) 113cdf0e10cSrcweir{ 114cdf0e10cSrcweir $filename = "Unihan.txt"; 115cdf0e10cSrcweir open IN, ("input/" . $filename) or die "Cannot read " . $filename; 116cdf0e10cSrcweir while (<IN>) 117cdf0e10cSrcweir { 118cdf0e10cSrcweir if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) 119cdf0e10cSrcweir { 120cdf0e10cSrcweir $utf32 = oct("0x" . $1); 121cdf0e10cSrcweir $cns_plane = oct("0x" . $2); 122cdf0e10cSrcweir $cns_row = oct("0x" . $3) - 0x20; 123cdf0e10cSrcweir $cns_column = oct("0x" . $4) - 0x20; 124cdf0e10cSrcweir isValidUtf32($utf32) 125cdf0e10cSrcweir or die "Bad UTF32 char U+" . printUtf32($utf32); 126cdf0e10cSrcweir isValidCns116431992($cns_plane, $cns_row, $cns_column) 127cdf0e10cSrcweir or die "Bad CNS11643-1992 char " 128cdf0e10cSrcweir . printCns116431992($cns_plane, 129cdf0e10cSrcweir $cns_row, 130cdf0e10cSrcweir $cns_column); 131cdf0e10cSrcweir if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 132cdf0e10cSrcweir { 133cdf0e10cSrcweir $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 134cdf0e10cSrcweir $cns_plane_used[$cns_plane] = 1; 135cdf0e10cSrcweir ++$count_Unihan_txt; 136cdf0e10cSrcweir } 137cdf0e10cSrcweir else 138cdf0e10cSrcweir { 139cdf0e10cSrcweir ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 140cdf0e10cSrcweir or die "Mapping " 141cdf0e10cSrcweir . printCns116431992($cns_plane, 142cdf0e10cSrcweir $cns_row, 143cdf0e10cSrcweir $cns_column) 144cdf0e10cSrcweir . " to " 145cdf0e10cSrcweir . printUtf32($cns_map[$cns_plane] 146cdf0e10cSrcweir [$cns_row] 147cdf0e10cSrcweir [$cns_column]) 148cdf0e10cSrcweir . ", NOT " 149cdf0e10cSrcweir . printUtf32($utf32); 150cdf0e10cSrcweir } 151cdf0e10cSrcweir } 152cdf0e10cSrcweir elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) 153cdf0e10cSrcweir { 154cdf0e10cSrcweir $utf32 = oct("0x" . $1); 155cdf0e10cSrcweir $cns_plane = oct("0x" . $2); 156cdf0e10cSrcweir $cns_row = oct("0x" . $3) - 0x20; 157cdf0e10cSrcweir $cns_column = oct("0x" . $4) - 0x20; 158cdf0e10cSrcweir isValidUtf32($utf32) 159cdf0e10cSrcweir or die "Bad UTF32 char U+" . printUtf32($utf32); 160cdf0e10cSrcweir isValidCns116431992($cns_plane, $cns_row, $cns_column) 161cdf0e10cSrcweir or die "Bad CNS11643-1992 char " 162cdf0e10cSrcweir . printCns116431992($cns_plane, 163cdf0e10cSrcweir $cns_row, 164cdf0e10cSrcweir $cns_column); 165cdf0e10cSrcweir if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 166cdf0e10cSrcweir { 167cdf0e10cSrcweir $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 168cdf0e10cSrcweir $cns_plane_used[$cns_plane] = 1; 169cdf0e10cSrcweir ++$count_Unihan_txt; 170cdf0e10cSrcweir } 171cdf0e10cSrcweir else 172cdf0e10cSrcweir { 173cdf0e10cSrcweir ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 174cdf0e10cSrcweir or print "WARNING! Mapping ", 175cdf0e10cSrcweir printCns116431992($cns_plane, 176cdf0e10cSrcweir $cns_row, 177cdf0e10cSrcweir $cns_column), 178cdf0e10cSrcweir " to ", 179cdf0e10cSrcweir printUtf32($cns_map[$cns_plane] 180cdf0e10cSrcweir [$cns_row] 181cdf0e10cSrcweir [$cns_column]), 182cdf0e10cSrcweir ", NOT ", 183cdf0e10cSrcweir printUtf32($utf32), 184cdf0e10cSrcweir "\n"; 185cdf0e10cSrcweir } 186cdf0e10cSrcweir } 187cdf0e10cSrcweir elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/) 188cdf0e10cSrcweir { 189cdf0e10cSrcweir die "Bad format"; 190cdf0e10cSrcweir } 191cdf0e10cSrcweir } 192cdf0e10cSrcweir close IN; 193cdf0e10cSrcweir} 194cdf0e10cSrcweir 195cdf0e10cSrcweirif (1) 196cdf0e10cSrcweir{ 197cdf0e10cSrcweir $filename = "CNS11643.TXT"; 198cdf0e10cSrcweir open IN, ("input/" . $filename) or die "Cannot read " . $filename; 199cdf0e10cSrcweir while (<IN>) 200cdf0e10cSrcweir { 201cdf0e10cSrcweir if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/) 202cdf0e10cSrcweir { 203cdf0e10cSrcweir $utf32 = oct("0x" . $4); 204cdf0e10cSrcweir $cns_plane = oct("0x" . $1); 205cdf0e10cSrcweir $cns_row = oct("0x" . $2) - 0x20; 206cdf0e10cSrcweir $cns_column = oct("0x" . $3) - 0x20; 207cdf0e10cSrcweir isValidUtf32($utf32) 208cdf0e10cSrcweir or die "Bad UTF32 char U+" . printUtf32($utf32); 209cdf0e10cSrcweir isValidCns116431992($cns_plane, $cns_row, $cns_column) 210cdf0e10cSrcweir or die "Bad CNS11643-1992 char " 211cdf0e10cSrcweir . printCns116431992($cns_plane, 212cdf0e10cSrcweir $cns_row, 213cdf0e10cSrcweir $cns_column); 214cdf0e10cSrcweir if ($cns_plane <= 2) 215cdf0e10cSrcweir { 216cdf0e10cSrcweir if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 217cdf0e10cSrcweir { 218cdf0e10cSrcweir $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 219cdf0e10cSrcweir $cns_plane_used[$cns_plane] = 1; 220cdf0e10cSrcweir ++$count_CNS11643_TXT; 221cdf0e10cSrcweir } 222cdf0e10cSrcweir else 223cdf0e10cSrcweir { 224cdf0e10cSrcweir ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 225cdf0e10cSrcweir or die "Mapping " 226cdf0e10cSrcweir . printCns116431992($cns_plane, 227cdf0e10cSrcweir $cns_row, 228cdf0e10cSrcweir $cns_column) 229cdf0e10cSrcweir . " to " 230cdf0e10cSrcweir . printUtf32($cns_map[$cns_plane] 231cdf0e10cSrcweir [$cns_row] 232cdf0e10cSrcweir [$cns_column]) 233cdf0e10cSrcweir . ", NOT " 234cdf0e10cSrcweir . printUtf32($utf32); 235cdf0e10cSrcweir } 236cdf0e10cSrcweir } 237cdf0e10cSrcweir } 238cdf0e10cSrcweir } 239cdf0e10cSrcweir close IN; 240cdf0e10cSrcweir} 241cdf0e10cSrcweir 242cdf0e10cSrcweirif (0) 243cdf0e10cSrcweir{ 244cdf0e10cSrcweir $filename = "Uni2CNS"; 245cdf0e10cSrcweir open IN, ("input/" . $filename) or die "Cannot read " . $filename; 246cdf0e10cSrcweir while (<IN>) 247cdf0e10cSrcweir { 248cdf0e10cSrcweir if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/) 249cdf0e10cSrcweir { 250cdf0e10cSrcweir $utf32 = oct("0x" . $1); 251cdf0e10cSrcweir $cns_plane = oct("0x" . $2); 252cdf0e10cSrcweir $cns_row = oct("0x" . $3) - 0x20; 253cdf0e10cSrcweir $cns_column = oct("0x" . $4) - 0x20; 254cdf0e10cSrcweir isValidUtf32($utf32) 255cdf0e10cSrcweir or die "Bad UTF32 char U+" . printUtf32($utf32); 256cdf0e10cSrcweir isValidCns116431992($cns_plane, $cns_row, $cns_column) 257cdf0e10cSrcweir or die "Bad CNS11643-1992 char " 258cdf0e10cSrcweir . printCns116431992($cns_plane, 259cdf0e10cSrcweir $cns_row, 260cdf0e10cSrcweir $cns_column); 261cdf0e10cSrcweir if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 262cdf0e10cSrcweir { 263cdf0e10cSrcweir $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 264cdf0e10cSrcweir $cns_plane_used[$cns_plane] = 1; 265cdf0e10cSrcweir ++$count_Uni2CNS; 266cdf0e10cSrcweir } 267cdf0e10cSrcweir else 268cdf0e10cSrcweir { 269cdf0e10cSrcweir# ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 270cdf0e10cSrcweir# or die "Mapping " 271cdf0e10cSrcweir# . printCns116431992($cns_plane, 272cdf0e10cSrcweir# $cns_row, 273cdf0e10cSrcweir# $cns_column) 274cdf0e10cSrcweir# . " to " 275cdf0e10cSrcweir# . printUtf32($cns_map[$cns_plane] 276cdf0e10cSrcweir# [$cns_row] 277cdf0e10cSrcweir# [$cns_column]) 278cdf0e10cSrcweir# . ", NOT " 279cdf0e10cSrcweir# . printUtf32($utf32); 280cdf0e10cSrcweir } 281cdf0e10cSrcweir if ($cns_plane == 1) 282cdf0e10cSrcweir { 283cdf0e10cSrcweir print printCns116431992($cns_plane, $cns_row, $cns_column), 284cdf0e10cSrcweir "\n"; 285cdf0e10cSrcweir } 286cdf0e10cSrcweir } 287cdf0e10cSrcweir } 288cdf0e10cSrcweir close IN; 289cdf0e10cSrcweir} 290cdf0e10cSrcweir 291cdf0e10cSrcweirfor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 292cdf0e10cSrcweir{ 293cdf0e10cSrcweir if (defined($cns_plane_used[$cns_plane])) 294cdf0e10cSrcweir { 295cdf0e10cSrcweir for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 296cdf0e10cSrcweir { 297cdf0e10cSrcweir for ($cns_column = 1; $cns_column <= 94; ++$cns_column) 298cdf0e10cSrcweir { 299cdf0e10cSrcweir if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 300cdf0e10cSrcweir { 301cdf0e10cSrcweir $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; 302cdf0e10cSrcweir $uni_plane = $utf32 >> 16; 303cdf0e10cSrcweir $uni_page = ($utf32 >> 8) & 0xFF; 304cdf0e10cSrcweir $uni_index = $utf32 & 0xFF; 305cdf0e10cSrcweir if (!defined($uni_plane_used[$uni_plane]) 306cdf0e10cSrcweir || !defined($uni_page_used[$uni_plane][$uni_page]) 307cdf0e10cSrcweir || !defined($uni_map[$uni_plane] 308cdf0e10cSrcweir [$uni_page] 309cdf0e10cSrcweir [$uni_index])) 310cdf0e10cSrcweir { 311cdf0e10cSrcweir $uni_map[$uni_plane][$uni_page][$uni_index] 312cdf0e10cSrcweir = ($cns_plane << 16) 313cdf0e10cSrcweir | ($cns_row << 8) 314cdf0e10cSrcweir | $cns_column; 315cdf0e10cSrcweir $uni_plane_used[$uni_plane] = 1; 316cdf0e10cSrcweir $uni_page_used[$uni_plane][$uni_page] = 1; 317cdf0e10cSrcweir } 318cdf0e10cSrcweir else 319cdf0e10cSrcweir { 320cdf0e10cSrcweir $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index]; 321cdf0e10cSrcweir $cns1_plane = $cns1 >> 16; 322cdf0e10cSrcweir $cns1_row = ($cns1 >> 8) & 0xFF; 323cdf0e10cSrcweir $cns1_column = $cns1 & 0xFF; 324cdf0e10cSrcweir 325*86e1cf34SPedro Giffuni # Do not map from Unicode to Fictitious Character Set 326cdf0e10cSrcweir # Extensions (Lunde, p. 131), if possible: 327cdf0e10cSrcweir if ($cns_plane == 3 328cdf0e10cSrcweir && ($cns_row == 66 && $cns_column > 38 329cdf0e10cSrcweir || $cns_row > 66)) 330cdf0e10cSrcweir { 331cdf0e10cSrcweir print " (", 332cdf0e10cSrcweir printUtf32($utf32), 333*86e1cf34SPedro Giffuni " to fictitious ", 334cdf0e10cSrcweir printCns116431992($cns_plane, 335cdf0e10cSrcweir $cns_row, 336cdf0e10cSrcweir $cns_column), 337cdf0e10cSrcweir " ignored, favouring ", 338cdf0e10cSrcweir printCns116431992($cns1_plane, 339cdf0e10cSrcweir $cns1_row, 340cdf0e10cSrcweir $cns1_column), 341cdf0e10cSrcweir ")\n"; 342cdf0e10cSrcweir } 343cdf0e10cSrcweir elsif ($cns1_plane == 3 344cdf0e10cSrcweir && ($cns1_row == 66 && $cns1_column > 38 345cdf0e10cSrcweir || $cns1_row > 66)) 346cdf0e10cSrcweir { 347cdf0e10cSrcweir $uni_map[$uni_plane][$uni_page][$uni_index] 348cdf0e10cSrcweir = ($cns_plane << 16) 349cdf0e10cSrcweir | ($cns_row << 8) 350cdf0e10cSrcweir | $cns_column; 351cdf0e10cSrcweir print " (", 352cdf0e10cSrcweir printUtf32($utf32), 353*86e1cf34SPedro Giffuni " to fictitious ", 354cdf0e10cSrcweir printCns116431992($cns1_plane, 355cdf0e10cSrcweir $cns1_row, 356cdf0e10cSrcweir $cns1_column), 357cdf0e10cSrcweir " ignored, favouring ", 358cdf0e10cSrcweir printCns116431992($cns_plane, 359cdf0e10cSrcweir $cns_row, 360cdf0e10cSrcweir $cns_column), 361cdf0e10cSrcweir ")\n"; 362cdf0e10cSrcweir } 363cdf0e10cSrcweir else 364cdf0e10cSrcweir { 365cdf0e10cSrcweir print "WARNING! Mapping ", 366cdf0e10cSrcweir printUtf32($utf32), 367cdf0e10cSrcweir " to ", 368cdf0e10cSrcweir printCns116431992($cns1_plane, 369cdf0e10cSrcweir $cns1_row, 370cdf0e10cSrcweir $cns1_column), 371cdf0e10cSrcweir ", NOT ", 372cdf0e10cSrcweir printCns116431992($cns_plane, 373cdf0e10cSrcweir $cns_row, 374cdf0e10cSrcweir $cns_column), 375cdf0e10cSrcweir "\n"; 376cdf0e10cSrcweir } 377cdf0e10cSrcweir } 378cdf0e10cSrcweir } 379cdf0e10cSrcweir } 380cdf0e10cSrcweir } 381cdf0e10cSrcweir } 382cdf0e10cSrcweir} 383cdf0e10cSrcweirif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) 384cdf0e10cSrcweir{ 385cdf0e10cSrcweir for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) 386cdf0e10cSrcweir { 387cdf0e10cSrcweir if (defined($uni_map[0][0][$uni_index])) 388cdf0e10cSrcweir { 389cdf0e10cSrcweir $cns = $uni_map[0][0][$utf32]; 390cdf0e10cSrcweir die "Mapping " 391cdf0e10cSrcweir . printUtf32($utf32) 392cdf0e10cSrcweir . " to " 393cdf0e10cSrcweir . printCns116431992($cns >> 16, 394cdf0e10cSrcweir ($cns >> 8) & 0xFF, 395cdf0e10cSrcweir $cns & 0xFF); 396cdf0e10cSrcweir } 397cdf0e10cSrcweir } 398cdf0e10cSrcweir} 399cdf0e10cSrcweir 400cdf0e10cSrcweir$filename = lc($id) . ".tab"; 401cdf0e10cSrcweiropen OUT, ("> " . $filename) or die "Cannot write " . $filename; 402cdf0e10cSrcweir 403cdf0e10cSrcweir{ 404cdf0e10cSrcweir $filename = lc($id). ".pl"; 405cdf0e10cSrcweir open IN, $filename or die "Cannot read ". $filename; 406cdf0e10cSrcweir $first = 1; 407cdf0e10cSrcweir while (<IN>) 408cdf0e10cSrcweir { 409cdf0e10cSrcweir if (/^\#!.*$/) 410cdf0e10cSrcweir { 411cdf0e10cSrcweir } 412cdf0e10cSrcweir elsif (/^\#(\*.*)$/) 413cdf0e10cSrcweir { 414cdf0e10cSrcweir if ($first == 1) 415cdf0e10cSrcweir { 416cdf0e10cSrcweir print OUT "/", $1, "\n"; 417cdf0e10cSrcweir $first = 0; 418cdf0e10cSrcweir } 419cdf0e10cSrcweir else 420cdf0e10cSrcweir { 421cdf0e10cSrcweir print OUT " ", substr($1, 0, length($1) - 1), "/\n"; 422cdf0e10cSrcweir } 423cdf0e10cSrcweir } 424cdf0e10cSrcweir elsif (/^\# (.*)$/) 425cdf0e10cSrcweir { 426cdf0e10cSrcweir print OUT " *", $1, "\n"; 427cdf0e10cSrcweir } 428cdf0e10cSrcweir elsif (/^\#(.*)$/) 429cdf0e10cSrcweir { 430cdf0e10cSrcweir print OUT " *", $1, "\n"; 431cdf0e10cSrcweir } 432cdf0e10cSrcweir else 433cdf0e10cSrcweir { 434cdf0e10cSrcweir goto done; 435cdf0e10cSrcweir } 436cdf0e10cSrcweir } 437cdf0e10cSrcweir done: 438cdf0e10cSrcweir} 439cdf0e10cSrcweir 440cdf0e10cSrcweirprint OUT "\n", 441cdf0e10cSrcweir "#ifndef _SAL_TYPES_H_\n", 442cdf0e10cSrcweir "#include \"sal/types.h\"\n", 443cdf0e10cSrcweir "#endif\n", 444cdf0e10cSrcweir "\n"; 445cdf0e10cSrcweir 446cdf0e10cSrcweirprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; 447cdf0e10cSrcweir$cns_data_index = 0; 448cdf0e10cSrcweirfor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 449cdf0e10cSrcweir{ 450cdf0e10cSrcweir if (defined($cns_plane_used[$cns_plane])) 451cdf0e10cSrcweir { 452cdf0e10cSrcweir $cns_rows = 0; 453cdf0e10cSrcweir $cns_chars = 0; 454cdf0e10cSrcweir for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 455cdf0e10cSrcweir { 456cdf0e10cSrcweir $cns_row_first = -1; 457cdf0e10cSrcweir for ($cns_column = 1; $cns_column <= 94; ++$cns_column) 458cdf0e10cSrcweir { 459cdf0e10cSrcweir if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 460cdf0e10cSrcweir { 461cdf0e10cSrcweir if ($cns_row_first == -1) 462cdf0e10cSrcweir { 463cdf0e10cSrcweir $cns_row_first = $cns_column; 464cdf0e10cSrcweir } 465cdf0e10cSrcweir $cns_row_last = $cns_column; 466cdf0e10cSrcweir } 467cdf0e10cSrcweir } 468cdf0e10cSrcweir if ($cns_row_first != -1) 469cdf0e10cSrcweir { 470cdf0e10cSrcweir $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index; 471cdf0e10cSrcweir ++$cns_rows; 472cdf0e10cSrcweir print OUT " /* plane ", $cns_plane, ", row ", $cns_row, 473cdf0e10cSrcweir " */\n"; 474cdf0e10cSrcweir 475cdf0e10cSrcweir $cns_row_surrogates_first = -1; 476cdf0e10cSrcweir $cns_row_chars = 0; 477cdf0e10cSrcweir $cns_row_surrogates = 0; 478cdf0e10cSrcweir 479cdf0e10cSrcweir print OUT " ", $cns_row_first, " | (", $cns_row_last, 480cdf0e10cSrcweir " << 8), /* first, last */\n"; 481cdf0e10cSrcweir ++$cns_data_index; 482cdf0e10cSrcweir 483cdf0e10cSrcweir print OUT " ", printSpaces(7, 10, $cns_row_first); 484cdf0e10cSrcweir $bol = 0; 485cdf0e10cSrcweir for ($cns_column = $cns_row_first; 486cdf0e10cSrcweir $cns_column <= $cns_row_last; 487cdf0e10cSrcweir ++$cns_column) 488cdf0e10cSrcweir { 489cdf0e10cSrcweir if ($bol == 1) 490cdf0e10cSrcweir { 491cdf0e10cSrcweir print OUT " "; 492cdf0e10cSrcweir $bol = 0; 493cdf0e10cSrcweir } 494cdf0e10cSrcweir if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 495cdf0e10cSrcweir { 496cdf0e10cSrcweir $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; 497cdf0e10cSrcweir ++$cns_row_chars; 498cdf0e10cSrcweir if ($utf32 <= 0xFFFF) 499cdf0e10cSrcweir { 500cdf0e10cSrcweir printf OUT "0x%04X,", $utf32; 501cdf0e10cSrcweir } 502cdf0e10cSrcweir else 503cdf0e10cSrcweir { 504cdf0e10cSrcweir ++$cns_row_surrogates; 505cdf0e10cSrcweir printf OUT "0x%04X,", 506cdf0e10cSrcweir (0xD800 | (($utf32 - 0x10000) >> 10)); 507cdf0e10cSrcweir if ($cns_row_surrogates_first == -1) 508cdf0e10cSrcweir { 509cdf0e10cSrcweir $cns_row_surrogates_first = $cns_column; 510cdf0e10cSrcweir } 511cdf0e10cSrcweir $cns_row_surrogates_last = $cns_column; 512cdf0e10cSrcweir } 513cdf0e10cSrcweir } 514cdf0e10cSrcweir else 515cdf0e10cSrcweir { 516cdf0e10cSrcweir printf OUT "0xffff,"; 517cdf0e10cSrcweir } 518cdf0e10cSrcweir ++$cns_data_index; 519cdf0e10cSrcweir if ($cns_column % 10 == 9) 520cdf0e10cSrcweir { 521cdf0e10cSrcweir print OUT "\n"; 522cdf0e10cSrcweir $bol = 1; 523cdf0e10cSrcweir } 524cdf0e10cSrcweir } 525cdf0e10cSrcweir if ($bol == 0) 526cdf0e10cSrcweir { 527cdf0e10cSrcweir print OUT "\n"; 528cdf0e10cSrcweir } 529cdf0e10cSrcweir 530cdf0e10cSrcweir if ($cns_row_surrogates_first != -1) 531cdf0e10cSrcweir { 532cdf0e10cSrcweir print OUT " ", $cns_row_surrogates_first, 533cdf0e10cSrcweir ", /* first low-surrogate */\n"; 534cdf0e10cSrcweir ++$cns_data_index; 535cdf0e10cSrcweir 536cdf0e10cSrcweir print OUT " ", 537cdf0e10cSrcweir printSpaces(7, 10, $cns_row_surrogates_first); 538cdf0e10cSrcweir $bol = 0; 539cdf0e10cSrcweir for ($cns_column = $cns_row_surrogates_first; 540cdf0e10cSrcweir $cns_column <= $cns_row_surrogates_last; 541cdf0e10cSrcweir ++$cns_column) 542cdf0e10cSrcweir { 543cdf0e10cSrcweir if ($bol == 1) 544cdf0e10cSrcweir { 545cdf0e10cSrcweir print OUT " "; 546cdf0e10cSrcweir $bol = 0; 547cdf0e10cSrcweir } 548cdf0e10cSrcweir $utf32 = 0; 549cdf0e10cSrcweir if (defined($cns_map[$cns_plane] 550cdf0e10cSrcweir [$cns_row] 551cdf0e10cSrcweir [$cns_column])) 552cdf0e10cSrcweir { 553cdf0e10cSrcweir $utf32 554cdf0e10cSrcweir = $cns_map[$cns_plane][$cns_row][$cns_column]; 555cdf0e10cSrcweir } 556cdf0e10cSrcweir if ($utf32 <= 0xFFFF) 557cdf0e10cSrcweir { 558cdf0e10cSrcweir printf OUT " 0,"; 559cdf0e10cSrcweir } 560cdf0e10cSrcweir else 561cdf0e10cSrcweir { 562cdf0e10cSrcweir printf OUT "0x%04X,", 563cdf0e10cSrcweir (0xDC00 564cdf0e10cSrcweir | (($utf32 - 0x10000) & 0x3FF)); 565cdf0e10cSrcweir } 566cdf0e10cSrcweir ++$cns_data_index; 567cdf0e10cSrcweir if ($cns_column % 10 == 9) 568cdf0e10cSrcweir { 569cdf0e10cSrcweir print OUT "\n"; 570cdf0e10cSrcweir $bol = 1; 571cdf0e10cSrcweir } 572cdf0e10cSrcweir } 573cdf0e10cSrcweir if ($bol == 0) 574cdf0e10cSrcweir { 575cdf0e10cSrcweir print OUT "\n"; 576cdf0e10cSrcweir } 577cdf0e10cSrcweir } 578cdf0e10cSrcweir 579cdf0e10cSrcweir $cns_chars += $cns_row_chars; 580cdf0e10cSrcweir $cns_data_space[$cns_plane][$cns_row] 581cdf0e10cSrcweir = ($cns_data_index 582cdf0e10cSrcweir - $cns_data_offsets[$cns_plane][$cns_row]) * 2; 583cdf0e10cSrcweir $cns_data_used[$cns_plane][$cns_row] 584cdf0e10cSrcweir = (1 + $cns_row_chars 585cdf0e10cSrcweir + ($cns_row_surrogates == 0 ? 586cdf0e10cSrcweir 0 : 1 + $cns_row_surrogates)) * 2; 587cdf0e10cSrcweir } 588cdf0e10cSrcweir else 589cdf0e10cSrcweir { 590cdf0e10cSrcweir print OUT " /* plane ", $cns_plane, ", row ", $cns_row, 591cdf0e10cSrcweir ": --- */\n"; 592cdf0e10cSrcweir $cns_data_offsets[$cns_plane][$cns_row] = -1; 593cdf0e10cSrcweir } 594cdf0e10cSrcweir } 595cdf0e10cSrcweir print "cns plane ", 596cdf0e10cSrcweir $cns_plane, 597cdf0e10cSrcweir ": rows = ", 598cdf0e10cSrcweir $cns_rows, 599cdf0e10cSrcweir ", chars = ", 600cdf0e10cSrcweir $cns_chars, 601cdf0e10cSrcweir "\n"; 602cdf0e10cSrcweir } 603cdf0e10cSrcweir} 604cdf0e10cSrcweirprint OUT "};\n\n"; 605cdf0e10cSrcweir 606cdf0e10cSrcweirprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; 607cdf0e10cSrcweirfor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 608cdf0e10cSrcweir{ 609cdf0e10cSrcweir if (defined ($cns_plane_used[$cns_plane])) 610cdf0e10cSrcweir { 611cdf0e10cSrcweir $cns_rowoffsets_used[$cns_plane] = 0; 612cdf0e10cSrcweir for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 613cdf0e10cSrcweir { 614cdf0e10cSrcweir if ($cns_data_offsets[$cns_plane][$cns_row] == -1) 615cdf0e10cSrcweir { 616cdf0e10cSrcweir print OUT " -1, /* plane ", 617cdf0e10cSrcweir $cns_plane, 618cdf0e10cSrcweir ", row ", 619cdf0e10cSrcweir $cns_row, 620cdf0e10cSrcweir " */\n"; 621cdf0e10cSrcweir } 622cdf0e10cSrcweir else 623cdf0e10cSrcweir { 624cdf0e10cSrcweir print OUT " ", 625cdf0e10cSrcweir $cns_data_offsets[$cns_plane][$cns_row], 626cdf0e10cSrcweir ", /* plane ", 627cdf0e10cSrcweir $cns_plane, 628cdf0e10cSrcweir ", row ", 629cdf0e10cSrcweir $cns_row, 630cdf0e10cSrcweir "; ", 631cdf0e10cSrcweir printStats($cns_data_used[$cns_plane][$cns_row], 632cdf0e10cSrcweir $cns_data_space[$cns_plane][$cns_row]), 633cdf0e10cSrcweir " */\n"; 634cdf0e10cSrcweir $cns_rowoffsets_used[$cns_plane] += 4; 635cdf0e10cSrcweir } 636cdf0e10cSrcweir } 637cdf0e10cSrcweir } 638cdf0e10cSrcweir else 639cdf0e10cSrcweir { 640cdf0e10cSrcweir print OUT " /* plane ", $cns_plane, ": --- */\n"; 641cdf0e10cSrcweir } 642cdf0e10cSrcweir} 643cdf0e10cSrcweirprint OUT "};\n\n"; 644cdf0e10cSrcweir 645cdf0e10cSrcweirprint OUT "static sal_Int32 const aImpl", 646cdf0e10cSrcweir $id, 647cdf0e10cSrcweir "ToUnicodePlaneOffsets[] = {\n"; 648cdf0e10cSrcweir$cns_row_offset = 0; 649cdf0e10cSrcweirfor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 650cdf0e10cSrcweir{ 651cdf0e10cSrcweir if (defined ($cns_plane_used[$cns_plane])) 652cdf0e10cSrcweir { 653cdf0e10cSrcweir print OUT " ", 654cdf0e10cSrcweir $cns_row_offset++, 655cdf0e10cSrcweir " * 94, /* plane ", 656cdf0e10cSrcweir $cns_plane, 657cdf0e10cSrcweir "; ", 658cdf0e10cSrcweir printStats($cns_rowoffsets_used[$cns_plane], 94 * 4), 659cdf0e10cSrcweir " */\n"; 660cdf0e10cSrcweir } 661cdf0e10cSrcweir else 662cdf0e10cSrcweir { 663cdf0e10cSrcweir print OUT " -1, /* plane ", $cns_plane, " */\n"; 664cdf0e10cSrcweir } 665cdf0e10cSrcweir} 666cdf0e10cSrcweirprint OUT "};\n\n"; 667cdf0e10cSrcweir 668cdf0e10cSrcweirprint OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n"; 669cdf0e10cSrcweir$uni_data_index = 0; 670cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 671cdf0e10cSrcweir{ 672cdf0e10cSrcweir if (defined($uni_plane_used[$uni_plane])) 673cdf0e10cSrcweir { 674cdf0e10cSrcweir for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 675cdf0e10cSrcweir { 676cdf0e10cSrcweir if (defined($uni_page_used[$uni_plane][$uni_page])) 677cdf0e10cSrcweir { 678cdf0e10cSrcweir $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; 679cdf0e10cSrcweir print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 680cdf0e10cSrcweir " */\n"; 681cdf0e10cSrcweir 682cdf0e10cSrcweir $uni_page_first = -1; 683cdf0e10cSrcweir for ($uni_index = 0; $uni_index <= 255; ++$uni_index) 684cdf0e10cSrcweir { 685cdf0e10cSrcweir if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 686cdf0e10cSrcweir { 687cdf0e10cSrcweir if ($uni_page_first == -1) 688cdf0e10cSrcweir { 689cdf0e10cSrcweir $uni_page_first = $uni_index; 690cdf0e10cSrcweir } 691cdf0e10cSrcweir $uni_page_last = $uni_index; 692cdf0e10cSrcweir } 693cdf0e10cSrcweir } 694cdf0e10cSrcweir 695cdf0e10cSrcweir $uni_data_used[$uni_plane][$uni_page] = 0; 696cdf0e10cSrcweir 697cdf0e10cSrcweir print OUT " ", $uni_page_first, ", ", $uni_page_last, 698cdf0e10cSrcweir ", /* first, last */\n"; 699cdf0e10cSrcweir $uni_data_index += 2; 700cdf0e10cSrcweir $uni_data_used[$uni_plane][$uni_page] += 2; 701cdf0e10cSrcweir 702cdf0e10cSrcweir print OUT " ", printSpaces(9, 8, $uni_page_first); 703cdf0e10cSrcweir $bol = 0; 704cdf0e10cSrcweir for ($uni_index = $uni_page_first; 705cdf0e10cSrcweir $uni_index <= $uni_page_last; 706cdf0e10cSrcweir ++$uni_index) 707cdf0e10cSrcweir { 708cdf0e10cSrcweir if ($bol == 1) 709cdf0e10cSrcweir { 710cdf0e10cSrcweir print OUT " "; 711cdf0e10cSrcweir $bol = 0; 712cdf0e10cSrcweir } 713cdf0e10cSrcweir if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 714cdf0e10cSrcweir { 715cdf0e10cSrcweir $cns = $uni_map[$uni_plane][$uni_page][$uni_index]; 716cdf0e10cSrcweir printf OUT "%2d,%2d,%2d,", 717cdf0e10cSrcweir $cns >> 16, 718cdf0e10cSrcweir $cns >> 8 & 0xFF, 719cdf0e10cSrcweir $cns & 0xFF; 720cdf0e10cSrcweir $uni_data_used[$uni_plane][$uni_page] += 3; 721cdf0e10cSrcweir } 722cdf0e10cSrcweir else 723cdf0e10cSrcweir { 724cdf0e10cSrcweir print OUT " 0, 0, 0,"; 725cdf0e10cSrcweir } 726cdf0e10cSrcweir $uni_data_index += 3; 727cdf0e10cSrcweir if ($uni_index % 8 == 7) 728cdf0e10cSrcweir { 729cdf0e10cSrcweir print OUT "\n"; 730cdf0e10cSrcweir $bol = 1; 731cdf0e10cSrcweir } 732cdf0e10cSrcweir } 733cdf0e10cSrcweir if ($bol == 0) 734cdf0e10cSrcweir { 735cdf0e10cSrcweir print OUT "\n"; 736cdf0e10cSrcweir } 737cdf0e10cSrcweir 738cdf0e10cSrcweir $uni_data_space[$uni_plane][$uni_page] 739cdf0e10cSrcweir = $uni_data_index 740cdf0e10cSrcweir - $uni_data_offsets[$uni_plane][$uni_page]; 741cdf0e10cSrcweir } 742cdf0e10cSrcweir else 743cdf0e10cSrcweir { 744cdf0e10cSrcweir $uni_data_offsets[$uni_plane][$uni_page] = -1; 745cdf0e10cSrcweir print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 746cdf0e10cSrcweir ": --- */\n"; 747cdf0e10cSrcweir } 748cdf0e10cSrcweir } 749cdf0e10cSrcweir } 750cdf0e10cSrcweir else 751cdf0e10cSrcweir { 752cdf0e10cSrcweir print OUT " /* plane ", $uni_plane, ": --- */\n"; 753cdf0e10cSrcweir } 754cdf0e10cSrcweir} 755cdf0e10cSrcweirprint OUT "};\n\n"; 756cdf0e10cSrcweir 757cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; 758cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 759cdf0e10cSrcweir{ 760cdf0e10cSrcweir if (defined($uni_plane_used[$uni_plane])) 761cdf0e10cSrcweir { 762cdf0e10cSrcweir $uni_pageoffsets_used[$uni_plane] = 0; 763cdf0e10cSrcweir $uni_data_used_sum[$uni_plane] = 0; 764cdf0e10cSrcweir $uni_data_space_sum[$uni_plane] = 0; 765cdf0e10cSrcweir for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 766cdf0e10cSrcweir { 767cdf0e10cSrcweir $offset = $uni_data_offsets[$uni_plane][$uni_page]; 768cdf0e10cSrcweir if ($offset == -1) 769cdf0e10cSrcweir { 770cdf0e10cSrcweir print OUT " -1, /* plane ", 771cdf0e10cSrcweir $uni_plane, 772cdf0e10cSrcweir ", page ", 773cdf0e10cSrcweir $uni_page, 774cdf0e10cSrcweir " */\n"; 775cdf0e10cSrcweir } 776cdf0e10cSrcweir else 777cdf0e10cSrcweir { 778cdf0e10cSrcweir print OUT " ", 779cdf0e10cSrcweir $offset, 780cdf0e10cSrcweir ", /* plane ", 781cdf0e10cSrcweir $uni_plane, 782cdf0e10cSrcweir ", page ", 783cdf0e10cSrcweir $uni_page, 784cdf0e10cSrcweir "; ", 785cdf0e10cSrcweir printStats($uni_data_used[$uni_plane][$uni_page], 786cdf0e10cSrcweir $uni_data_space[$uni_plane][$uni_page]), 787cdf0e10cSrcweir " */\n"; 788cdf0e10cSrcweir $uni_pageoffsets_used[$uni_plane] += 4; 789cdf0e10cSrcweir $uni_data_used_sum[$uni_plane] 790cdf0e10cSrcweir += $uni_data_used[$uni_plane][$uni_page]; 791cdf0e10cSrcweir $uni_data_space_sum[$uni_plane] 792cdf0e10cSrcweir += $uni_data_space[$uni_plane][$uni_page]; 793cdf0e10cSrcweir } 794cdf0e10cSrcweir } 795cdf0e10cSrcweir } 796cdf0e10cSrcweir else 797cdf0e10cSrcweir { 798cdf0e10cSrcweir print OUT " /* plane ", $uni_plane, ": --- */\n"; 799cdf0e10cSrcweir } 800cdf0e10cSrcweir} 801cdf0e10cSrcweirprint OUT "};\n\n"; 802cdf0e10cSrcweir 803cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo", 804cdf0e10cSrcweir $id, 805cdf0e10cSrcweir "PlaneOffsets[] = {\n"; 806cdf0e10cSrcweir$uni_page_offset = 0; 807cdf0e10cSrcweir$uni_planeoffsets_used = 0; 808cdf0e10cSrcweir$uni_pageoffsets_used_sum = 0; 809cdf0e10cSrcweir$uni_pageoffsets_space_sum = 0; 810cdf0e10cSrcweir$uni_data_used_sum2 = 0; 811cdf0e10cSrcweir$uni_data_space_sum2 = 0; 812cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 813cdf0e10cSrcweir{ 814cdf0e10cSrcweir if (defined ($uni_plane_used[$uni_plane])) 815cdf0e10cSrcweir { 816cdf0e10cSrcweir print OUT " ", 817cdf0e10cSrcweir $uni_page_offset++, 818cdf0e10cSrcweir " * 256, /* plane ", 819cdf0e10cSrcweir $uni_plane, 820cdf0e10cSrcweir "; ", 821cdf0e10cSrcweir printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), 822cdf0e10cSrcweir ", ", 823cdf0e10cSrcweir printStats($uni_data_used_sum[$uni_plane], 824cdf0e10cSrcweir $uni_data_space_sum[$uni_plane]), 825cdf0e10cSrcweir " */\n"; 826cdf0e10cSrcweir $uni_planeoffsets_used += 4; 827cdf0e10cSrcweir $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; 828cdf0e10cSrcweir $uni_pageoffsets_space_sum += 256 * 4; 829cdf0e10cSrcweir $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; 830cdf0e10cSrcweir $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; 831cdf0e10cSrcweir } 832cdf0e10cSrcweir else 833cdf0e10cSrcweir { 834cdf0e10cSrcweir print OUT " -1, /* plane ", $uni_plane, " */\n"; 835cdf0e10cSrcweir } 836cdf0e10cSrcweir} 837cdf0e10cSrcweirprint OUT " /* ", 838cdf0e10cSrcweir printStats($uni_planeoffsets_used, 17 * 4), 839cdf0e10cSrcweir ", ", 840cdf0e10cSrcweir printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), 841cdf0e10cSrcweir ", ", 842cdf0e10cSrcweir printStats($uni_data_used_sum2, $uni_data_space_sum2), 843cdf0e10cSrcweir " */\n};\n"; 844cdf0e10cSrcweir 845cdf0e10cSrcweirclose OUT; 846cdf0e10cSrcweir 847cdf0e10cSrcweirprint "Unihan.txt = ", $count_Unihan_txt, 848cdf0e10cSrcweir ", CNS11643.TXT = ", $count_CNS11643_TXT, 849cdf0e10cSrcweir ", Uni2CNS = ", $count_Uni2CNS, 850cdf0e10cSrcweir ", total = ", 851cdf0e10cSrcweir ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS), 852cdf0e10cSrcweir "\n"; 853