1#!/usr/bin/perl 2#************************************************************** 3# 4# Licensed to the Apache Software Foundation (ASF) under one 5# or more contributor license agreements. See the NOTICE file 6# distributed with this work for additional information 7# regarding copyright ownership. The ASF licenses this file 8# to you under the Apache License, Version 2.0 (the 9# "License"); you may not use this file except in compliance 10# with the License. You may obtain a copy of the License at 11# 12# http://www.apache.org/licenses/LICENSE-2.0 13# 14# Unless required by applicable law or agreed to in writing, 15# software distributed under the License is distributed on an 16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17# KIND, either express or implied. See the License for the 18# specific language governing permissions and limitations 19# under the License. 20# 21#************************************************************** 22 23 24 25# The following files must be available in a ./input subdir: 26 27# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>: 28# "Unicode version: 3.1.1 Table version: 1.1 Date: 28 June 2001" 29# contains descriptions for: 30# U+3400..4DFF CJK Unified Ideographs Extension A 31# U+4E00..9FFF CJK Unified Ideographs 32# U+F900..FAFF CJK Compatibility Ideographs 33# U+20000..2F7FF CJK Unified Ideographs Extension B 34# U+2F800..2FFFF CJK Compatibility Ideographs Supplement 35 36# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>: 37# "Unicode version: 1.1 Table version: 0.0d1 Date: 21 October 1994" 38# contains mappings for CNS 11643-1986 39 40# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>: 41# "Unicode version: 1.1 Table version: 0.49 Date: 26 March 1998" 42# contains mappings for CNS 11643-1992 that are incompatible with 43# CNS11643.TXT 44 45$id = "Cns116431992"; 46 47sub isValidUtf32 48{ 49 my $utf32 = $_[0]; 50 return $utf32 >= 0 && $utf32 <= 0x10FFFF 51 && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) 52 && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) 53 && ($utf32 & 0xFFFF) < 0xFFFE; 54} 55 56sub printUtf32 57{ 58 my $utf32 = $_[0]; 59 return sprintf("U+%04X", $utf32); 60} 61 62sub isValidCns116431992 63{ 64 my $plane = $_[0]; 65 my $row = $_[1]; 66 my $column = $_[2]; 67 return $plane >= 1 && $plane <= 16 68 && $row >= 1 && $row <= 94 69 && $column >= 1 && $column <= 94; 70} 71 72sub printCns116431992 73{ 74 my $plane = $_[0]; 75 my $row = $_[1]; 76 my $column = $_[2]; 77 return sprintf("%d-%02d/%02d", $plane, $row, $column); 78} 79 80sub printStats 81{ 82 my $used = $_[0]; 83 my $space = $_[1]; 84 return sprintf("%d/%d bytes (%.1f%%)", 85 $used, 86 $space, 87 $used * 100 / $space); 88} 89 90sub printSpaces 91{ 92 my $column_width = $_[0]; 93 my $columns_per_line = $_[1]; 94 my $end = $_[2]; 95 $output = ""; 96 for ($i = int($end / $columns_per_line) * $columns_per_line; 97 $i < $end; 98 ++$i) 99 { 100 for ($j = 0; $j < $column_width; ++$j) 101 { 102 $output = $output . " "; 103 } 104 } 105 return $output; 106} 107 108$count_Unihan_txt = 0; 109$count_CNS11643_TXT = 0; 110$count_Uni2CNS = 0; 111 112if (1) 113{ 114 $filename = "Unihan.txt"; 115 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 116 while (<IN>) 117 { 118 if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) 119 { 120 $utf32 = oct("0x" . $1); 121 $cns_plane = oct("0x" . $2); 122 $cns_row = oct("0x" . $3) - 0x20; 123 $cns_column = oct("0x" . $4) - 0x20; 124 isValidUtf32($utf32) 125 or die "Bad UTF32 char U+" . printUtf32($utf32); 126 isValidCns116431992($cns_plane, $cns_row, $cns_column) 127 or die "Bad CNS11643-1992 char " 128 . printCns116431992($cns_plane, 129 $cns_row, 130 $cns_column); 131 if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 132 { 133 $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 134 $cns_plane_used[$cns_plane] = 1; 135 ++$count_Unihan_txt; 136 } 137 else 138 { 139 ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 140 or die "Mapping " 141 . printCns116431992($cns_plane, 142 $cns_row, 143 $cns_column) 144 . " to " 145 . printUtf32($cns_map[$cns_plane] 146 [$cns_row] 147 [$cns_column]) 148 . ", NOT " 149 . printUtf32($utf32); 150 } 151 } 152 elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) 153 { 154 $utf32 = oct("0x" . $1); 155 $cns_plane = oct("0x" . $2); 156 $cns_row = oct("0x" . $3) - 0x20; 157 $cns_column = oct("0x" . $4) - 0x20; 158 isValidUtf32($utf32) 159 or die "Bad UTF32 char U+" . printUtf32($utf32); 160 isValidCns116431992($cns_plane, $cns_row, $cns_column) 161 or die "Bad CNS11643-1992 char " 162 . printCns116431992($cns_plane, 163 $cns_row, 164 $cns_column); 165 if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 166 { 167 $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 168 $cns_plane_used[$cns_plane] = 1; 169 ++$count_Unihan_txt; 170 } 171 else 172 { 173 ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 174 or print "WARNING! Mapping ", 175 printCns116431992($cns_plane, 176 $cns_row, 177 $cns_column), 178 " to ", 179 printUtf32($cns_map[$cns_plane] 180 [$cns_row] 181 [$cns_column]), 182 ", NOT ", 183 printUtf32($utf32), 184 "\n"; 185 } 186 } 187 elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/) 188 { 189 die "Bad format"; 190 } 191 } 192 close IN; 193} 194 195if (1) 196{ 197 $filename = "CNS11643.TXT"; 198 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 199 while (<IN>) 200 { 201 if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/) 202 { 203 $utf32 = oct("0x" . $4); 204 $cns_plane = oct("0x" . $1); 205 $cns_row = oct("0x" . $2) - 0x20; 206 $cns_column = oct("0x" . $3) - 0x20; 207 isValidUtf32($utf32) 208 or die "Bad UTF32 char U+" . printUtf32($utf32); 209 isValidCns116431992($cns_plane, $cns_row, $cns_column) 210 or die "Bad CNS11643-1992 char " 211 . printCns116431992($cns_plane, 212 $cns_row, 213 $cns_column); 214 if ($cns_plane <= 2) 215 { 216 if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 217 { 218 $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 219 $cns_plane_used[$cns_plane] = 1; 220 ++$count_CNS11643_TXT; 221 } 222 else 223 { 224 ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 225 or die "Mapping " 226 . printCns116431992($cns_plane, 227 $cns_row, 228 $cns_column) 229 . " to " 230 . printUtf32($cns_map[$cns_plane] 231 [$cns_row] 232 [$cns_column]) 233 . ", NOT " 234 . printUtf32($utf32); 235 } 236 } 237 } 238 } 239 close IN; 240} 241 242if (0) 243{ 244 $filename = "Uni2CNS"; 245 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 246 while (<IN>) 247 { 248 if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/) 249 { 250 $utf32 = oct("0x" . $1); 251 $cns_plane = oct("0x" . $2); 252 $cns_row = oct("0x" . $3) - 0x20; 253 $cns_column = oct("0x" . $4) - 0x20; 254 isValidUtf32($utf32) 255 or die "Bad UTF32 char U+" . printUtf32($utf32); 256 isValidCns116431992($cns_plane, $cns_row, $cns_column) 257 or die "Bad CNS11643-1992 char " 258 . printCns116431992($cns_plane, 259 $cns_row, 260 $cns_column); 261 if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 262 { 263 $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 264 $cns_plane_used[$cns_plane] = 1; 265 ++$count_Uni2CNS; 266 } 267 else 268 { 269# ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 270# or die "Mapping " 271# . printCns116431992($cns_plane, 272# $cns_row, 273# $cns_column) 274# . " to " 275# . printUtf32($cns_map[$cns_plane] 276# [$cns_row] 277# [$cns_column]) 278# . ", NOT " 279# . printUtf32($utf32); 280 } 281 if ($cns_plane == 1) 282 { 283 print printCns116431992($cns_plane, $cns_row, $cns_column), 284 "\n"; 285 } 286 } 287 } 288 close IN; 289} 290 291for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 292{ 293 if (defined($cns_plane_used[$cns_plane])) 294 { 295 for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 296 { 297 for ($cns_column = 1; $cns_column <= 94; ++$cns_column) 298 { 299 if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 300 { 301 $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; 302 $uni_plane = $utf32 >> 16; 303 $uni_page = ($utf32 >> 8) & 0xFF; 304 $uni_index = $utf32 & 0xFF; 305 if (!defined($uni_plane_used[$uni_plane]) 306 || !defined($uni_page_used[$uni_plane][$uni_page]) 307 || !defined($uni_map[$uni_plane] 308 [$uni_page] 309 [$uni_index])) 310 { 311 $uni_map[$uni_plane][$uni_page][$uni_index] 312 = ($cns_plane << 16) 313 | ($cns_row << 8) 314 | $cns_column; 315 $uni_plane_used[$uni_plane] = 1; 316 $uni_page_used[$uni_plane][$uni_page] = 1; 317 } 318 else 319 { 320 $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index]; 321 $cns1_plane = $cns1 >> 16; 322 $cns1_row = ($cns1 >> 8) & 0xFF; 323 $cns1_column = $cns1 & 0xFF; 324 325 # Do not map from Unicode to Fictious Character Set 326 # Extensions (Lunde, p. 131), if possible: 327 if ($cns_plane == 3 328 && ($cns_row == 66 && $cns_column > 38 329 || $cns_row > 66)) 330 { 331 print " (", 332 printUtf32($utf32), 333 " to fictious ", 334 printCns116431992($cns_plane, 335 $cns_row, 336 $cns_column), 337 " ignored, favouring ", 338 printCns116431992($cns1_plane, 339 $cns1_row, 340 $cns1_column), 341 ")\n"; 342 } 343 elsif ($cns1_plane == 3 344 && ($cns1_row == 66 && $cns1_column > 38 345 || $cns1_row > 66)) 346 { 347 $uni_map[$uni_plane][$uni_page][$uni_index] 348 = ($cns_plane << 16) 349 | ($cns_row << 8) 350 | $cns_column; 351 print " (", 352 printUtf32($utf32), 353 " to fictious ", 354 printCns116431992($cns1_plane, 355 $cns1_row, 356 $cns1_column), 357 " ignored, favouring ", 358 printCns116431992($cns_plane, 359 $cns_row, 360 $cns_column), 361 ")\n"; 362 } 363 else 364 { 365 print "WARNING! Mapping ", 366 printUtf32($utf32), 367 " to ", 368 printCns116431992($cns1_plane, 369 $cns1_row, 370 $cns1_column), 371 ", NOT ", 372 printCns116431992($cns_plane, 373 $cns_row, 374 $cns_column), 375 "\n"; 376 } 377 } 378 } 379 } 380 } 381 } 382} 383if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) 384{ 385 for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) 386 { 387 if (defined($uni_map[0][0][$uni_index])) 388 { 389 $cns = $uni_map[0][0][$utf32]; 390 die "Mapping " 391 . printUtf32($utf32) 392 . " to " 393 . printCns116431992($cns >> 16, 394 ($cns >> 8) & 0xFF, 395 $cns & 0xFF); 396 } 397 } 398} 399 400$filename = lc($id) . ".tab"; 401open OUT, ("> " . $filename) or die "Cannot write " . $filename; 402 403{ 404 $filename = lc($id). ".pl"; 405 open IN, $filename or die "Cannot read ". $filename; 406 $first = 1; 407 while (<IN>) 408 { 409 if (/^\#!.*$/) 410 { 411 } 412 elsif (/^\#(\*.*)$/) 413 { 414 if ($first == 1) 415 { 416 print OUT "/", $1, "\n"; 417 $first = 0; 418 } 419 else 420 { 421 print OUT " ", substr($1, 0, length($1) - 1), "/\n"; 422 } 423 } 424 elsif (/^\# (.*)$/) 425 { 426 print OUT " *", $1, "\n"; 427 } 428 elsif (/^\#(.*)$/) 429 { 430 print OUT " *", $1, "\n"; 431 } 432 else 433 { 434 goto done; 435 } 436 } 437 done: 438} 439 440print OUT "\n", 441 "#ifndef _SAL_TYPES_H_\n", 442 "#include \"sal/types.h\"\n", 443 "#endif\n", 444 "\n"; 445 446print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; 447$cns_data_index = 0; 448for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 449{ 450 if (defined($cns_plane_used[$cns_plane])) 451 { 452 $cns_rows = 0; 453 $cns_chars = 0; 454 for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 455 { 456 $cns_row_first = -1; 457 for ($cns_column = 1; $cns_column <= 94; ++$cns_column) 458 { 459 if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 460 { 461 if ($cns_row_first == -1) 462 { 463 $cns_row_first = $cns_column; 464 } 465 $cns_row_last = $cns_column; 466 } 467 } 468 if ($cns_row_first != -1) 469 { 470 $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index; 471 ++$cns_rows; 472 print OUT " /* plane ", $cns_plane, ", row ", $cns_row, 473 " */\n"; 474 475 $cns_row_surrogates_first = -1; 476 $cns_row_chars = 0; 477 $cns_row_surrogates = 0; 478 479 print OUT " ", $cns_row_first, " | (", $cns_row_last, 480 " << 8), /* first, last */\n"; 481 ++$cns_data_index; 482 483 print OUT " ", printSpaces(7, 10, $cns_row_first); 484 $bol = 0; 485 for ($cns_column = $cns_row_first; 486 $cns_column <= $cns_row_last; 487 ++$cns_column) 488 { 489 if ($bol == 1) 490 { 491 print OUT " "; 492 $bol = 0; 493 } 494 if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 495 { 496 $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; 497 ++$cns_row_chars; 498 if ($utf32 <= 0xFFFF) 499 { 500 printf OUT "0x%04X,", $utf32; 501 } 502 else 503 { 504 ++$cns_row_surrogates; 505 printf OUT "0x%04X,", 506 (0xD800 | (($utf32 - 0x10000) >> 10)); 507 if ($cns_row_surrogates_first == -1) 508 { 509 $cns_row_surrogates_first = $cns_column; 510 } 511 $cns_row_surrogates_last = $cns_column; 512 } 513 } 514 else 515 { 516 printf OUT "0xffff,"; 517 } 518 ++$cns_data_index; 519 if ($cns_column % 10 == 9) 520 { 521 print OUT "\n"; 522 $bol = 1; 523 } 524 } 525 if ($bol == 0) 526 { 527 print OUT "\n"; 528 } 529 530 if ($cns_row_surrogates_first != -1) 531 { 532 print OUT " ", $cns_row_surrogates_first, 533 ", /* first low-surrogate */\n"; 534 ++$cns_data_index; 535 536 print OUT " ", 537 printSpaces(7, 10, $cns_row_surrogates_first); 538 $bol = 0; 539 for ($cns_column = $cns_row_surrogates_first; 540 $cns_column <= $cns_row_surrogates_last; 541 ++$cns_column) 542 { 543 if ($bol == 1) 544 { 545 print OUT " "; 546 $bol = 0; 547 } 548 $utf32 = 0; 549 if (defined($cns_map[$cns_plane] 550 [$cns_row] 551 [$cns_column])) 552 { 553 $utf32 554 = $cns_map[$cns_plane][$cns_row][$cns_column]; 555 } 556 if ($utf32 <= 0xFFFF) 557 { 558 printf OUT " 0,"; 559 } 560 else 561 { 562 printf OUT "0x%04X,", 563 (0xDC00 564 | (($utf32 - 0x10000) & 0x3FF)); 565 } 566 ++$cns_data_index; 567 if ($cns_column % 10 == 9) 568 { 569 print OUT "\n"; 570 $bol = 1; 571 } 572 } 573 if ($bol == 0) 574 { 575 print OUT "\n"; 576 } 577 } 578 579 $cns_chars += $cns_row_chars; 580 $cns_data_space[$cns_plane][$cns_row] 581 = ($cns_data_index 582 - $cns_data_offsets[$cns_plane][$cns_row]) * 2; 583 $cns_data_used[$cns_plane][$cns_row] 584 = (1 + $cns_row_chars 585 + ($cns_row_surrogates == 0 ? 586 0 : 1 + $cns_row_surrogates)) * 2; 587 } 588 else 589 { 590 print OUT " /* plane ", $cns_plane, ", row ", $cns_row, 591 ": --- */\n"; 592 $cns_data_offsets[$cns_plane][$cns_row] = -1; 593 } 594 } 595 print "cns plane ", 596 $cns_plane, 597 ": rows = ", 598 $cns_rows, 599 ", chars = ", 600 $cns_chars, 601 "\n"; 602 } 603} 604print OUT "};\n\n"; 605 606print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; 607for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 608{ 609 if (defined ($cns_plane_used[$cns_plane])) 610 { 611 $cns_rowoffsets_used[$cns_plane] = 0; 612 for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 613 { 614 if ($cns_data_offsets[$cns_plane][$cns_row] == -1) 615 { 616 print OUT " -1, /* plane ", 617 $cns_plane, 618 ", row ", 619 $cns_row, 620 " */\n"; 621 } 622 else 623 { 624 print OUT " ", 625 $cns_data_offsets[$cns_plane][$cns_row], 626 ", /* plane ", 627 $cns_plane, 628 ", row ", 629 $cns_row, 630 "; ", 631 printStats($cns_data_used[$cns_plane][$cns_row], 632 $cns_data_space[$cns_plane][$cns_row]), 633 " */\n"; 634 $cns_rowoffsets_used[$cns_plane] += 4; 635 } 636 } 637 } 638 else 639 { 640 print OUT " /* plane ", $cns_plane, ": --- */\n"; 641 } 642} 643print OUT "};\n\n"; 644 645print OUT "static sal_Int32 const aImpl", 646 $id, 647 "ToUnicodePlaneOffsets[] = {\n"; 648$cns_row_offset = 0; 649for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 650{ 651 if (defined ($cns_plane_used[$cns_plane])) 652 { 653 print OUT " ", 654 $cns_row_offset++, 655 " * 94, /* plane ", 656 $cns_plane, 657 "; ", 658 printStats($cns_rowoffsets_used[$cns_plane], 94 * 4), 659 " */\n"; 660 } 661 else 662 { 663 print OUT " -1, /* plane ", $cns_plane, " */\n"; 664 } 665} 666print OUT "};\n\n"; 667 668print OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n"; 669$uni_data_index = 0; 670for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 671{ 672 if (defined($uni_plane_used[$uni_plane])) 673 { 674 for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 675 { 676 if (defined($uni_page_used[$uni_plane][$uni_page])) 677 { 678 $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; 679 print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 680 " */\n"; 681 682 $uni_page_first = -1; 683 for ($uni_index = 0; $uni_index <= 255; ++$uni_index) 684 { 685 if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 686 { 687 if ($uni_page_first == -1) 688 { 689 $uni_page_first = $uni_index; 690 } 691 $uni_page_last = $uni_index; 692 } 693 } 694 695 $uni_data_used[$uni_plane][$uni_page] = 0; 696 697 print OUT " ", $uni_page_first, ", ", $uni_page_last, 698 ", /* first, last */\n"; 699 $uni_data_index += 2; 700 $uni_data_used[$uni_plane][$uni_page] += 2; 701 702 print OUT " ", printSpaces(9, 8, $uni_page_first); 703 $bol = 0; 704 for ($uni_index = $uni_page_first; 705 $uni_index <= $uni_page_last; 706 ++$uni_index) 707 { 708 if ($bol == 1) 709 { 710 print OUT " "; 711 $bol = 0; 712 } 713 if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 714 { 715 $cns = $uni_map[$uni_plane][$uni_page][$uni_index]; 716 printf OUT "%2d,%2d,%2d,", 717 $cns >> 16, 718 $cns >> 8 & 0xFF, 719 $cns & 0xFF; 720 $uni_data_used[$uni_plane][$uni_page] += 3; 721 } 722 else 723 { 724 print OUT " 0, 0, 0,"; 725 } 726 $uni_data_index += 3; 727 if ($uni_index % 8 == 7) 728 { 729 print OUT "\n"; 730 $bol = 1; 731 } 732 } 733 if ($bol == 0) 734 { 735 print OUT "\n"; 736 } 737 738 $uni_data_space[$uni_plane][$uni_page] 739 = $uni_data_index 740 - $uni_data_offsets[$uni_plane][$uni_page]; 741 } 742 else 743 { 744 $uni_data_offsets[$uni_plane][$uni_page] = -1; 745 print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 746 ": --- */\n"; 747 } 748 } 749 } 750 else 751 { 752 print OUT " /* plane ", $uni_plane, ": --- */\n"; 753 } 754} 755print OUT "};\n\n"; 756 757print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; 758for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 759{ 760 if (defined($uni_plane_used[$uni_plane])) 761 { 762 $uni_pageoffsets_used[$uni_plane] = 0; 763 $uni_data_used_sum[$uni_plane] = 0; 764 $uni_data_space_sum[$uni_plane] = 0; 765 for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 766 { 767 $offset = $uni_data_offsets[$uni_plane][$uni_page]; 768 if ($offset == -1) 769 { 770 print OUT " -1, /* plane ", 771 $uni_plane, 772 ", page ", 773 $uni_page, 774 " */\n"; 775 } 776 else 777 { 778 print OUT " ", 779 $offset, 780 ", /* plane ", 781 $uni_plane, 782 ", page ", 783 $uni_page, 784 "; ", 785 printStats($uni_data_used[$uni_plane][$uni_page], 786 $uni_data_space[$uni_plane][$uni_page]), 787 " */\n"; 788 $uni_pageoffsets_used[$uni_plane] += 4; 789 $uni_data_used_sum[$uni_plane] 790 += $uni_data_used[$uni_plane][$uni_page]; 791 $uni_data_space_sum[$uni_plane] 792 += $uni_data_space[$uni_plane][$uni_page]; 793 } 794 } 795 } 796 else 797 { 798 print OUT " /* plane ", $uni_plane, ": --- */\n"; 799 } 800} 801print OUT "};\n\n"; 802 803print OUT "static sal_Int32 const aImplUnicodeTo", 804 $id, 805 "PlaneOffsets[] = {\n"; 806$uni_page_offset = 0; 807$uni_planeoffsets_used = 0; 808$uni_pageoffsets_used_sum = 0; 809$uni_pageoffsets_space_sum = 0; 810$uni_data_used_sum2 = 0; 811$uni_data_space_sum2 = 0; 812for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 813{ 814 if (defined ($uni_plane_used[$uni_plane])) 815 { 816 print OUT " ", 817 $uni_page_offset++, 818 " * 256, /* plane ", 819 $uni_plane, 820 "; ", 821 printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), 822 ", ", 823 printStats($uni_data_used_sum[$uni_plane], 824 $uni_data_space_sum[$uni_plane]), 825 " */\n"; 826 $uni_planeoffsets_used += 4; 827 $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; 828 $uni_pageoffsets_space_sum += 256 * 4; 829 $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; 830 $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; 831 } 832 else 833 { 834 print OUT " -1, /* plane ", $uni_plane, " */\n"; 835 } 836} 837print OUT " /* ", 838 printStats($uni_planeoffsets_used, 17 * 4), 839 ", ", 840 printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), 841 ", ", 842 printStats($uni_data_used_sum2, $uni_data_space_sum2), 843 " */\n};\n"; 844 845close OUT; 846 847print "Unihan.txt = ", $count_Unihan_txt, 848 ", CNS11643.TXT = ", $count_CNS11643_TXT, 849 ", Uni2CNS = ", $count_Uni2CNS, 850 ", total = ", 851 ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS), 852 "\n"; 853