1#!/usr/bin/perl 2#************************************************************** 3# 4# Licensed to the Apache Software Foundation (ASF) under one 5# or more contributor license agreements. See the NOTICE file 6# distributed with this work for additional information 7# regarding copyright ownership. The ASF licenses this file 8# to you under the Apache License, Version 2.0 (the 9# "License"); you may not use this file except in compliance 10# with the License. You may obtain a copy of the License at 11# 12# http://www.apache.org/licenses/LICENSE-2.0 13# 14# Unless required by applicable law or agreed to in writing, 15# software distributed under the License is distributed on an 16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17# KIND, either express or implied. See the License for the 18# specific language governing permissions and limitations 19# under the License. 20# 21#************************************************************** 22 23 24 25# The following files must be available in a ./input subdir: 26 27# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt> 28 29# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>: 30# "Unicode version: 1.1 Table version: 0.0d3 Date: 11 February 1994" 31# Only used to track Unicode characters that are mapped from both Big5 and 32# HKSCS. 33 34# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>: 35# "Unicode version: 2.0 Table version: 2.01 Date: 1/7/2000" 36# Only used to track Unicode characters that are mapped from both CP950 and 37# HKSCS. 38 39$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0 40 41$id = "Big5Hkscs2001"; 42 43sub isValidUtf32 44{ 45 my $utf32 = $_[0]; 46 return $utf32 >= 0 && $utf32 <= 0x10FFFF 47 && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) 48 && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) 49 && ($utf32 & 0xFFFF) < 0xFFFE; 50} 51 52sub printUtf32 53{ 54 my $utf32 = $_[0]; 55 return sprintf("U+%04X", $utf32); 56} 57 58sub isValidBig5 59{ 60 my $big5 = $_[0]; 61 my $big5_row = $big5 >> 8; 62 my $big5_column = $big5 & 0xFF; 63 return $big5_row >= 0x81 && $big5_row <= 0xFE 64 && ($big5_column >= 0x40 && $big5_column <= 0x7E 65 || $big5_column >= 0xA1 && $big5_column <= 0xFE); 66} 67 68sub printBig5 69{ 70 my $big5 = $_[0]; 71 return sprintf("%04X", $big5); 72} 73 74sub printStats 75{ 76 my $used = $_[0]; 77 my $space = $_[1]; 78 return sprintf("%d/%d bytes (%.1f%%)", 79 $used, 80 $space, 81 $used * 100 / $space); 82} 83 84sub printSpaces 85{ 86 my $column_width = $_[0]; 87 my $columns_per_line = $_[1]; 88 my $end = $_[2]; 89 $output = ""; 90 for ($i = int($end / $columns_per_line) * $columns_per_line; 91 $i < $end; 92 ++$i) 93 { 94 for ($j = 0; $j < $column_width; ++$j) 95 { 96 $output = $output . " "; 97 } 98 } 99 return $output; 100} 101 102sub addMapping 103{ 104 my $utf32 = $_[0]; 105 my $big5 = $_[1]; 106 my $comp = $_[2]; 107 108 $uni_plane = $utf32 >> 16; 109 $uni_page = ($utf32 >> 8) & 0xFF; 110 $uni_index = $utf32 & 0xFF; 111 112 if (!defined($uni_plane_used[$uni_plane]) 113 || !defined($uni_page_used[$uni_plane][$uni_page]) 114 || !defined($uni_map[$uni_plane][$uni_page][$uni_index])) 115 { 116 $uni_map[$uni_plane][$uni_page][$uni_index] = $big5; 117 $uni_plane_used[$uni_plane] = 1; 118 $uni_page_used[$uni_plane][$uni_page] = 1; 119 if ($comp != -1) 120 { 121 ++$compat[$comp]; 122 } 123 } 124 else 125 { 126 $big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index]; 127 print "WARNING! Mapping ", printUtf32($utf32), " to ", 128 printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""), 129 printBig5($big5), "\n"; 130 } 131} 132 133# Build mappings to track Unicode characters that are mapped from both Big5/ 134# CP950 and HKSCS: 135{ 136 $filename = "BIG5.TXT"; 137 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 138 while (<IN>) 139 { 140 if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/) 141 { 142 $big5 = oct($1); 143 $utf32 = oct($2); 144 isValidBig5($big5) 145 or die "Bad Big5 char " . printBig5($big5); 146 isValidUtf32($utf32) 147 or die "Bad UTF32 char " . printUtf32($utf32); 148 if ($utf32 != 0xFFFD) 149 { 150 if (defined($underlying_big5[$utf32])) 151 { 152 print "WARNING! In ", $filename, ", both ", 153 printBig5($underlying_big5[$utf32]), " and ", 154 printBig5($big5), " map to ", printUtf32($utf32), 155 "\n"; 156 } 157 else 158 { 159 $underlying_big5[$utf32] = $big5; 160 } 161 } 162 } 163 } 164 close IN; 165 166 $filename = "CP950.TXT"; 167 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 168 while (<IN>) 169 { 170 if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/) 171 { 172 $big5 = oct($1); 173 $utf32 = oct($2); 174 isValidBig5($big5) 175 or die "Bad Big5 char " . printBig5($big5); 176 isValidUtf32($utf32) 177 or die "Bad UTF32 char " . printUtf32($utf32); 178 if (defined($underlying_cp950[$utf32])) 179 { 180 print "WARNING! In ", $filename, ", both ", 181 printBig5($underlying_cp950[$utf32]), " and ", 182 printBig5($big5), " map to ", printUtf32($utf32), "\n"; 183 } 184 else 185 { 186 $underlying_cp950[$utf32] = $big5; 187 } 188 } 189 } 190 close IN; 191} 192 193# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some 194# nonstandard Unicode points, so they are explicitly mentioned here to map 195# to the standard Unicode PUA points. (In the other direction, the unofficial 196# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless, 197# since all Unicode characters involved are already covered by the official 198# Big5-HKSCS mappings.) 199$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1); 200$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1); 201$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1); 202$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1); 203$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1); 204$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1); 205 206# The following implements the mapping of Big5-HKSCS compatibility points 207# (GCCS characters unified with other HKSCS characters) to Unicode. In the 208# other direction, characters from Unicode's PUA will map to these Big5-HKSCS 209# compatibility points. (See the first list in <http://www.info.gov.hk/ 210# digital21/eng/hkscs/download/big5cmp.txt>.) 211$big5_map[0x8E][0x69] = 0x7BB8; 212$big5_map[0x8E][0x6F] = 0x7C06; 213$big5_map[0x8E][0x7E] = 0x7CCE; 214$big5_map[0x8E][0xAB] = 0x7DD2; 215$big5_map[0x8E][0xB4] = 0x7E1D; 216$big5_map[0x8E][0xCD] = 0x8005; 217$big5_map[0x8E][0xD0] = 0x8028; 218$big5_map[0x8F][0x57] = 0x83C1; 219$big5_map[0x8F][0x69] = 0x84A8; 220$big5_map[0x8F][0x6E] = 0x840F; 221$big5_map[0x8F][0xCB] = 0x89A6; 222$big5_map[0x8F][0xCC] = 0x89A9; 223$big5_map[0x8F][0xFE] = 0x8D77; 224$big5_map[0x90][0x6D] = 0x90FD; 225$big5_map[0x90][0x7A] = 0x92B9; 226$big5_map[0x90][0xDC] = 0x975C; 227$big5_map[0x90][0xF1] = 0x97FF; 228$big5_map[0x91][0xBF] = 0x9F16; 229$big5_map[0x92][0x44] = 0x8503; 230$big5_map[0x92][0xAF] = 0x5159; 231$big5_map[0x92][0xB0] = 0x515B; 232$big5_map[0x92][0xB1] = 0x515D; 233$big5_map[0x92][0xB2] = 0x515E; 234$big5_map[0x92][0xC8] = 0x936E; 235$big5_map[0x92][0xD1] = 0x7479; 236$big5_map[0x94][0x47] = 0x6D67; 237$big5_map[0x94][0xCA] = 0x799B; 238$big5_map[0x95][0xD9] = 0x9097; 239$big5_map[0x96][0x44] = 0x975D; 240$big5_map[0x96][0xED] = 0x701E; 241$big5_map[0x96][0xFC] = 0x5B28; 242$big5_map[0x9B][0x76] = 0x7201; 243$big5_map[0x9B][0x78] = 0x77D7; 244$big5_map[0x9B][0x7B] = 0x7E87; 245$big5_map[0x9B][0xC6] = 0x99D6; 246$big5_map[0x9B][0xDE] = 0x91D4; 247$big5_map[0x9B][0xEC] = 0x60DE; 248$big5_map[0x9B][0xF6] = 0x6FB6; 249$big5_map[0x9C][0x42] = 0x8F36; 250$big5_map[0x9C][0x53] = 0x4FBB; 251$big5_map[0x9C][0x62] = 0x71DF; 252$big5_map[0x9C][0x68] = 0x9104; 253$big5_map[0x9C][0x6B] = 0x9DF0; 254$big5_map[0x9C][0x77] = 0x83CF; 255$big5_map[0x9C][0xBC] = 0x5C10; 256$big5_map[0x9C][0xBD] = 0x79E3; 257$big5_map[0x9C][0xD0] = 0x5A67; 258$big5_map[0x9D][0x57] = 0x8F0B; 259$big5_map[0x9D][0x5A] = 0x7B51; 260$big5_map[0x9D][0xC4] = 0x62D0; 261$big5_map[0x9E][0xA9] = 0x6062; 262$big5_map[0x9E][0xEF] = 0x75F9; 263$big5_map[0x9E][0xFD] = 0x6C4A; 264$big5_map[0x9F][0x60] = 0x9B2E; 265$big5_map[0x9F][0x66] = 0x9F17; 266$big5_map[0x9F][0xCB] = 0x50ED; 267$big5_map[0x9F][0xD8] = 0x5F0C; 268$big5_map[0xA0][0x63] = 0x880F; 269$big5_map[0xA0][0x77] = 0x62CE; 270$big5_map[0xA0][0xD5] = 0x7468; 271$big5_map[0xA0][0xDF] = 0x7162; 272$big5_map[0xA0][0xE4] = 0x7250; 273$big5_map[0xFA][0x5F] = 0x5029; 274$big5_map[0xFA][0x66] = 0x507D; 275$big5_map[0xFA][0xBD] = 0x5305; 276$big5_map[0xFA][0xC5] = 0x5344; 277$big5_map[0xFA][0xD5] = 0x537F; 278$big5_map[0xFB][0x48] = 0x5605; 279$big5_map[0xFB][0xB8] = 0x5A77; 280$big5_map[0xFB][0xF3] = 0x5E75; 281$big5_map[0xFB][0xF9] = 0x5ED0; 282$big5_map[0xFC][0x4F] = 0x5F58; 283$big5_map[0xFC][0x6C] = 0x60A4; 284$big5_map[0xFC][0xB9] = 0x6490; 285$big5_map[0xFC][0xE2] = 0x6674; 286$big5_map[0xFC][0xF1] = 0x675E; 287$big5_map[0xFD][0xB7] = 0x6C9C; 288$big5_map[0xFD][0xB8] = 0x6E1D; 289$big5_map[0xFD][0xBB] = 0x6E2F; 290$big5_map[0xFD][0xF1] = 0x716E; 291$big5_map[0xFE][0x52] = 0x732A; 292$big5_map[0xFE][0x6F] = 0x745C; 293$big5_map[0xFE][0xAA] = 0x74E9; 294$big5_map[0xFE][0xDD] = 0x7809; 295 296$pua = 0; 297$compat[0] = 0; # 1993 298$compat[1] = 0; # 2000 299$compat[2] = 0; # 2001 300 301$filename = "big5-iso.txt"; 302open IN, ("input/" . $filename) or die "Cannot read " . $filename; 303while (<IN>) 304{ 305 if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/) 306 { 307 $big5 = oct("0x" . $1); 308 $utf32_1993 = oct("0x" . $2); 309 $utf32_2000 = oct("0x" . $3); 310 $utf32_2001 = oct("0x" . $4); 311 isValidBig5($big5) 312 or die "Bad Big5 char " . printBig5($big5); 313 isValidUtf32($utf32_1993) 314 or die "Bad UTF32 char " . printUtf32($utf32_1993); 315 isValidUtf32($utf32_2000) 316 or die "Bad UTF32 char " . printUtf32($utf32_2000); 317 isValidUtf32($utf32_2001) 318 or die "Bad UTF32 char " . printUtf32($utf32_2001); 319 320 $utf32 = $surrogates ? $utf32_2001 : $utf32_2000; 321 322 if (defined($underlying_big5[$utf32]) 323 || defined($underlying_cp950[$utf32])) 324 { 325 if (defined($underlying_big5[$utf32]) 326 && defined($underlying_cp950[$utf32]) 327 && $underlying_big5[$utf32] == $underlying_cp950[$utf32] 328 && $underlying_big5[$utf32] == $big5 329 || 330 defined($underlying_big5[$utf32]) 331 && !defined($underlying_cp950[$utf32]) 332 && $underlying_big5[$utf32] == $big5 333 || 334 !defined($underlying_big5[$utf32]) 335 && defined($underlying_cp950[$utf32]) 336 && $underlying_cp950[$utf32] == $big5) 337 { 338 # ignore 339 340 # Depending on real underlying mapping (cf. 341 # ../convertbig5hkscs.tab), it would be possible to save some 342 # table space by dropping those HKSCS code points that are 343 # already covered by the underlying mapping. 344 } 345 else 346 { 347 print "XXX mapping underlying"; 348 if (defined($underlying_big5[$utf32]) 349 && defined($underlying_cp950[$utf32]) 350 && $underlying_big5[$utf32] == $underlying_cp950[$utf32]) 351 { 352 print " Big5/CP950 ", printBig5($underlying_big5[$utf32]); 353 } 354 else 355 { 356 if (defined($underlying_big5[$utf32])) 357 { 358 print " Big5 ", printBig5($underlying_big5[$utf32]); 359 } 360 if (defined($underlying_cp950[$utf32])) 361 { 362 print " CP950 ", printBig5($underlying_cp950[$utf32]); 363 } 364 } 365 print " and HKSCS ", printBig5($big5), " to ", 366 printUtf32($utf32), "\n"; 367 } 368 } 369 370 if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF) 371 { 372 ++$pua; 373 } 374 375 $big5_row = $big5 >> 8; 376 $big5_column = $big5 & 0xFF; 377 if (defined($big5_map[$big5_row][$big5_column])) 378 { 379 die "Bad Big5 mapping " . printBig5($big5); 380 } 381 $big5_map[$big5_row][$big5_column] = $utf32; 382 383 addMapping($utf32, $big5, -1); 384 385 if ($utf32_2001 != $utf32) 386 { 387 addMapping($utf32_2001, $big5, 2); 388 } 389 if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001) 390 { 391 addMapping($utf32_2000, $big5, 1); 392 } 393 if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000 394 && $utf32_1993 != $utf32_2001) 395 { 396 addMapping($utf32_1993, $big5, 0); 397 } 398 } 399} 400close IN; 401 402print $pua, " mappings to PUA\n"; 403print $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0); 404print $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0); 405print $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0); 406 407if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) 408{ 409 for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) 410 { 411 if (defined($uni_map[0][0][$uni_index])) 412 { 413 $big5 = $uni_map[0][0][$utf32]; 414 die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5); 415 } 416 } 417} 418 419$filename = lc($id) . ".tab"; 420open OUT, ("> " . $filename) or die "Cannot write " . $filename; 421 422{ 423 $filename = lc($id). ".pl"; 424 open IN, $filename or die "Cannot read ". $filename; 425 $first = 1; 426 while (<IN>) 427 { 428 if (/^\#!.*$/) 429 { 430 } 431 elsif (/^\#(\*.*)$/) 432 { 433 if ($first == 1) 434 { 435 print OUT "/", $1, "\n"; 436 $first = 0; 437 } 438 else 439 { 440 print OUT " ", substr($1, 0, length($1) - 1), "/\n"; 441 } 442 } 443 elsif (/^\# (.*)$/) 444 { 445 print OUT " *", $1, "\n"; 446 } 447 elsif (/^\#(.*)$/) 448 { 449 print OUT " *", $1, "\n"; 450 } 451 else 452 { 453 goto done; 454 } 455 } 456 done: 457} 458 459print OUT "\n", 460 "#ifndef _SAL_TYPES_H_\n", 461 "#include \"sal/types.h\"\n", 462 "#endif\n", 463 "\n"; 464 465print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; 466$big5_data_index = 0; 467$big5_rows = 0; 468$big5_chars = 0; 469for ($big5_row = 0; $big5_row <= 255; ++$big5_row) 470{ 471 $big5_row_first = -1; 472 for ($big5_column = 0; $big5_column <= 255; ++$big5_column) 473 { 474 if (defined($big5_map[$big5_row][$big5_column])) 475 { 476 if ($big5_row_first == -1) 477 { 478 $big5_row_first = $big5_column; 479 } 480 $big5_row_last = $big5_column; 481 } 482 } 483 if ($big5_row_first != -1) 484 { 485 $big5_data_offsets[$big5_row] = $big5_data_index; 486 ++$big5_rows; 487 print OUT " /* row ", $big5_row, " */\n"; 488 489 $big5_row_surrogates_first = -1; 490 $big5_row_chars = 0; 491 $big5_row_surrogates = 0; 492 493 print OUT " ", $big5_row_first, " | (", $big5_row_last, 494 " << 8), /* first, last */\n"; 495 ++$big5_data_index; 496 497 print OUT " ", printSpaces(7, 10, $big5_row_first); 498 $bol = 0; 499 for ($big5_column = $big5_row_first; 500 $big5_column <= $big5_row_last; 501 ++$big5_column) 502 { 503 if ($bol == 1) 504 { 505 print OUT " "; 506 $bol = 0; 507 } 508 if (defined($big5_map[$big5_row][$big5_column])) 509 { 510 $utf32 = $big5_map[$big5_row][$big5_column]; 511 ++$big5_row_chars; 512 if ($utf32 <= 0xFFFF) 513 { 514 printf OUT "0x%04X,", $utf32; 515 } 516 else 517 { 518 ++$big5_row_surrogates; 519 printf OUT "0x%04X,", 520 (0xD800 | (($utf32 - 0x10000) >> 10)); 521 if ($big5_row_surrogates_first == -1) 522 { 523 $big5_row_surrogates_first = $big5_column; 524 } 525 $big5_row_surrogates_last = $big5_column; 526 } 527 } 528 else 529 { 530 printf OUT "0xffff,"; 531 } 532 ++$big5_data_index; 533 if ($big5_column % 10 == 9) 534 { 535 print OUT "\n"; 536 $bol = 1; 537 } 538 } 539 if ($bol == 0) 540 { 541 print OUT "\n"; 542 } 543 544 if ($big5_row_surrogates_first != -1) 545 { 546 print OUT " ", $big5_row_surrogates_first, 547 ", /* first low-surrogate */\n"; 548 ++$big5_data_index; 549 550 print OUT " ", printSpaces(7, 10, $big5_row_surrogates_first); 551 $bol = 0; 552 for ($big5_column = $big5_row_surrogates_first; 553 $big5_column <= $big5_row_surrogates_last; 554 ++$big5_column) 555 { 556 if ($bol == 1) 557 { 558 print OUT " "; 559 $bol = 0; 560 } 561 $utf32 = 0; 562 if (defined($big5_map[$big5_row][$big5_column])) 563 { 564 $utf32 = $big5_map[$big5_row][$big5_column]; 565 } 566 if ($utf32 <= 0xFFFF) 567 { 568 printf OUT " 0,"; 569 } 570 else 571 { 572 printf OUT "0x%04X,", 573 (0xDC00 | (($utf32 - 0x10000) & 0x3FF)); 574 } 575 ++$big5_data_index; 576 if ($big5_column % 10 == 9) 577 { 578 print OUT "\n"; 579 $bol = 1; 580 } 581 } 582 if ($bol == 0) 583 { 584 print OUT "\n"; 585 } 586 } 587 588 $big5_chars += $big5_row_chars; 589 $big5_data_space[$big5_row] 590 = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2; 591 $big5_data_used[$big5_row] 592 = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ? 593 0 : 1 + $big5_row_surrogates)) 594 * 2; 595 } 596 else 597 { 598 print OUT " /* row ", $big5_row, ": --- */\n"; 599 $big5_data_offsets[$big5_row] = -1; 600 } 601} 602print OUT "};\n\n"; 603print "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n"; 604 605print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; 606$big5_rowoffsets_used = 0; 607for ($big5_row = 0; $big5_row <= 255; ++$big5_row) 608{ 609 if ($big5_data_offsets[$big5_row] == -1) 610 { 611 print OUT " -1, /* row ", $big5_row, " */\n"; 612 } 613 else 614 { 615 print OUT " ", 616 $big5_data_offsets[$big5_row], 617 ", /* row ", 618 $big5_row, 619 "; ", 620 printStats($big5_data_used[$big5_row], 621 $big5_data_space[$big5_row]), 622 " */\n"; 623 $big5_rowoffsets_used += 4; 624 } 625} 626print OUT "};\n\n"; 627 628print OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n"; 629$uni_data_index = 0; 630for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 631{ 632 if (defined($uni_plane_used[$uni_plane])) 633 { 634 for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 635 { 636 if (defined($uni_page_used[$uni_plane][$uni_page])) 637 { 638 $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; 639 print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 640 " */\n"; 641 642 $uni_page_first = -1; 643 for ($uni_index = 0; $uni_index <= 255; ++$uni_index) 644 { 645 if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 646 { 647 if ($uni_page_first == -1) 648 { 649 $uni_page_first = $uni_index; 650 } 651 $uni_page_last = $uni_index; 652 } 653 } 654 655 $uni_data_used[$uni_plane][$uni_page] = 0; 656 657 print OUT " ", $uni_page_first, " | (", $uni_page_last, 658 " << 8), /* first, last */\n"; 659 ++$uni_data_index; 660 $uni_data_used[$uni_plane][$uni_page] += 2; 661 662 print OUT " ", printSpaces(7, 10, $uni_page_first); 663 $bol = 0; 664 for ($uni_index = $uni_page_first; 665 $uni_index <= $uni_page_last; 666 ++$uni_index) 667 { 668 if ($bol == 1) 669 { 670 print OUT " "; 671 $bol = 0; 672 } 673 if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 674 { 675 $big5 = $uni_map[$uni_plane][$uni_page][$uni_index]; 676 printf OUT "0x%04X,", $big5; 677 $uni_data_used[$uni_plane][$uni_page] += 2; 678 } 679 else 680 { 681 print OUT " 0,"; 682 } 683 ++$uni_data_index; 684 if ($uni_index % 10 == 9) 685 { 686 print OUT "\n"; 687 $bol = 1; 688 } 689 } 690 if ($bol == 0) 691 { 692 print OUT "\n"; 693 } 694 695 $uni_data_space[$uni_plane][$uni_page] 696 = ($uni_data_index 697 - $uni_data_offsets[$uni_plane][$uni_page]) * 2; 698 } 699 else 700 { 701 $uni_data_offsets[$uni_plane][$uni_page] = -1; 702 print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 703 ": --- */\n"; 704 } 705 } 706 } 707 else 708 { 709 print OUT " /* plane ", $uni_plane, ": --- */\n"; 710 } 711} 712print OUT "};\n\n"; 713 714print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; 715for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 716{ 717 if (defined($uni_plane_used[$uni_plane])) 718 { 719 $uni_pageoffsets_used[$uni_plane] = 0; 720 $uni_data_used_sum[$uni_plane] = 0; 721 $uni_data_space_sum[$uni_plane] = 0; 722 for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 723 { 724 $offset = $uni_data_offsets[$uni_plane][$uni_page]; 725 if ($offset == -1) 726 { 727 print OUT " -1, /* plane ", 728 $uni_plane, 729 ", page ", 730 $uni_page, 731 " */\n"; 732 } 733 else 734 { 735 print OUT " ", 736 $offset, 737 ", /* plane ", 738 $uni_plane, 739 ", page ", 740 $uni_page, 741 "; ", 742 printStats($uni_data_used[$uni_plane][$uni_page], 743 $uni_data_space[$uni_plane][$uni_page]), 744 " */\n"; 745 $uni_pageoffsets_used[$uni_plane] += 4; 746 $uni_data_used_sum[$uni_plane] 747 += $uni_data_used[$uni_plane][$uni_page]; 748 $uni_data_space_sum[$uni_plane] 749 += $uni_data_space[$uni_plane][$uni_page]; 750 } 751 } 752 } 753 else 754 { 755 print OUT " /* plane ", $uni_plane, ": --- */\n"; 756 } 757} 758print OUT "};\n\n"; 759 760print OUT "static sal_Int32 const aImplUnicodeTo", 761 $id, 762 "PlaneOffsets[] = {\n"; 763$uni_page_offset = 0; 764$uni_planeoffsets_used = 0; 765$uni_pageoffsets_used_sum = 0; 766$uni_pageoffsets_space_sum = 0; 767$uni_data_used_sum2 = 0; 768$uni_data_space_sum2 = 0; 769for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 770{ 771 if (defined ($uni_plane_used[$uni_plane])) 772 { 773 print OUT " ", 774 $uni_page_offset++, 775 " * 256, /* plane ", 776 $uni_plane, 777 "; ", 778 printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), 779 ", ", 780 printStats($uni_data_used_sum[$uni_plane], 781 $uni_data_space_sum[$uni_plane]), 782 " */\n"; 783 $uni_planeoffsets_used += 4; 784 $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; 785 $uni_pageoffsets_space_sum += 256 * 4; 786 $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; 787 $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; 788 } 789 else 790 { 791 print OUT " -1, /* plane ", $uni_plane, " */\n"; 792 } 793} 794print OUT " /* ", 795 printStats($uni_planeoffsets_used, 17 * 4), 796 ", ", 797 printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), 798 ", ", 799 printStats($uni_data_used_sum2, $uni_data_space_sum2), 800 " */\n};\n"; 801 802close OUT; 803