1#!/usr/bin/perl
2#*************************************************************************
3#
4# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5#
6# Copyright 2000, 2010 Oracle and/or its affiliates.
7#
8# OpenOffice.org - a multi-platform office productivity suite
9#
10# This file is part of OpenOffice.org.
11#
12# OpenOffice.org is free software: you can redistribute it and/or modify
13# it under the terms of the GNU Lesser General Public License version 3
14# only, as published by the Free Software Foundation.
15#
16# OpenOffice.org is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU Lesser General Public License version 3 for more details
20# (a copy is included in the LICENSE file that accompanied this code).
21#
22# You should have received a copy of the GNU Lesser General Public License
23# version 3 along with OpenOffice.org.  If not, see
24# <http://www.openoffice.org/license.html>
25# for a copy of the LGPLv3 License.
26#
27#*************************************************************************
28
29# The following files must be available in a ./input subdir:
30
31# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt>
32
33# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>:
34#  "Unicode version: 1.1    Table version: 0.0d3    Date: 11 February 1994"
35#  Only used to track Unicode characters that are mapped from both Big5 and
36#  HKSCS.
37
38# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>:
39#  "Unicode version: 2.0    Table version: 2.01    Date: 1/7/2000"
40#  Only used to track Unicode characters that are mapped from both CP950 and
41#  HKSCS.
42
43$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0
44
45$id = "Big5Hkscs2001";
46
47sub isValidUtf32
48{
49    my $utf32 = $_[0];
50    return $utf32 >= 0 && $utf32 <= 0x10FFFF
51           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
52           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
53           && ($utf32 & 0xFFFF) < 0xFFFE;
54}
55
56sub printUtf32
57{
58    my $utf32 = $_[0];
59    return sprintf("U+%04X", $utf32);
60}
61
62sub isValidBig5
63{
64    my $big5 = $_[0];
65    my $big5_row = $big5 >> 8;
66    my $big5_column = $big5 & 0xFF;
67    return $big5_row >= 0x81 && $big5_row <= 0xFE
68           && ($big5_column >= 0x40 && $big5_column <= 0x7E
69               || $big5_column >= 0xA1 && $big5_column <= 0xFE);
70}
71
72sub printBig5
73{
74    my $big5 = $_[0];
75    return sprintf("%04X", $big5);
76}
77
78sub printStats
79{
80    my $used = $_[0];
81    my $space = $_[1];
82    return sprintf("%d/%d bytes (%.1f%%)",
83                   $used,
84                   $space,
85                   $used * 100 / $space);
86}
87
88sub printSpaces
89{
90    my $column_width = $_[0];
91    my $columns_per_line = $_[1];
92    my $end = $_[2];
93    $output = "";
94    for ($i = int($end / $columns_per_line) * $columns_per_line;
95         $i < $end;
96         ++$i)
97    {
98        for ($j = 0; $j < $column_width; ++$j)
99        {
100            $output = $output . " ";
101        }
102    }
103    return $output;
104}
105
106sub addMapping
107{
108	my $utf32 = $_[0];
109	my $big5 = $_[1];
110	my $comp = $_[2];
111
112	$uni_plane = $utf32 >> 16;
113	$uni_page = ($utf32 >> 8) & 0xFF;
114	$uni_index = $utf32 & 0xFF;
115
116	if (!defined($uni_plane_used[$uni_plane])
117		|| !defined($uni_page_used[$uni_plane][$uni_page])
118		|| !defined($uni_map[$uni_plane][$uni_page][$uni_index]))
119	{
120		$uni_map[$uni_plane][$uni_page][$uni_index] = $big5;
121		$uni_plane_used[$uni_plane] = 1;
122		$uni_page_used[$uni_plane][$uni_page] = 1;
123		if ($comp != -1)
124		{
125			++$compat[$comp];
126		}
127	}
128	else
129	{
130		$big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index];
131		print "WARNING!  Mapping ", printUtf32($utf32), " to ",
132		      printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""),
133		      printBig5($big5), "\n";
134	}
135}
136
137# Build mappings to track Unicode characters that are mapped from both Big5/
138# CP950 and HKSCS:
139{
140	$filename = "BIG5.TXT";
141	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
142	while (<IN>)
143	{
144		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
145		{
146			$big5 = oct($1);
147			$utf32 = oct($2);
148			isValidBig5($big5)
149				or die "Bad Big5 char " . printBig5($big5);
150			isValidUtf32($utf32)
151				or die "Bad UTF32 char " . printUtf32($utf32);
152			if ($utf32 != 0xFFFD)
153			{
154				if (defined($underlying_big5[$utf32]))
155				{
156					print "WARNING!  In ", $filename, ", both ",
157					      printBig5($underlying_big5[$utf32]), " and ",
158					      printBig5($big5), " map to ", printUtf32($utf32),
159					      "\n";
160				}
161				else
162				{
163					$underlying_big5[$utf32] = $big5;
164				}
165			}
166		}
167	}
168	close IN;
169
170	$filename = "CP950.TXT";
171	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
172	while (<IN>)
173	{
174		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
175		{
176			$big5 = oct($1);
177			$utf32 = oct($2);
178			isValidBig5($big5)
179				or die "Bad Big5 char " . printBig5($big5);
180			isValidUtf32($utf32)
181				or die "Bad UTF32 char " . printUtf32($utf32);
182			if (defined($underlying_cp950[$utf32]))
183			{
184				print "WARNING!  In ", $filename, ", both ",
185				      printBig5($underlying_cp950[$utf32]), " and ",
186				      printBig5($big5), " map to ", printUtf32($utf32), "\n";
187			}
188			else
189			{
190				$underlying_cp950[$utf32] = $big5;
191			}
192		}
193	}
194	close IN;
195}
196
197# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some
198# nonstandard Unicode points, so they are explicitly mentioned here to map
199# to the standard Unicode PUA points.  (In the other direction, the unofficial
200# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless,
201# since all Unicode characters involved are already covered by the official
202# Big5-HKSCS mappings.)
203$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1);
204$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1);
205$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1);
206$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1);
207$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1);
208$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1);
209
210# The following implements the mapping of Big5-HKSCS compatibility points
211# (GCCS characters unified with other HKSCS characters) to Unicode.  In the
212# other direction, characters from Unicode's PUA will map to these Big5-HKSCS
213# compatibility points.  (See the first list in <http://www.info.gov.hk/
214# digital21/eng/hkscs/download/big5cmp.txt>.)
215$big5_map[0x8E][0x69] = 0x7BB8;
216$big5_map[0x8E][0x6F] = 0x7C06;
217$big5_map[0x8E][0x7E] = 0x7CCE;
218$big5_map[0x8E][0xAB] = 0x7DD2;
219$big5_map[0x8E][0xB4] = 0x7E1D;
220$big5_map[0x8E][0xCD] = 0x8005;
221$big5_map[0x8E][0xD0] = 0x8028;
222$big5_map[0x8F][0x57] = 0x83C1;
223$big5_map[0x8F][0x69] = 0x84A8;
224$big5_map[0x8F][0x6E] = 0x840F;
225$big5_map[0x8F][0xCB] = 0x89A6;
226$big5_map[0x8F][0xCC] = 0x89A9;
227$big5_map[0x8F][0xFE] = 0x8D77;
228$big5_map[0x90][0x6D] = 0x90FD;
229$big5_map[0x90][0x7A] = 0x92B9;
230$big5_map[0x90][0xDC] = 0x975C;
231$big5_map[0x90][0xF1] = 0x97FF;
232$big5_map[0x91][0xBF] = 0x9F16;
233$big5_map[0x92][0x44] = 0x8503;
234$big5_map[0x92][0xAF] = 0x5159;
235$big5_map[0x92][0xB0] = 0x515B;
236$big5_map[0x92][0xB1] = 0x515D;
237$big5_map[0x92][0xB2] = 0x515E;
238$big5_map[0x92][0xC8] = 0x936E;
239$big5_map[0x92][0xD1] = 0x7479;
240$big5_map[0x94][0x47] = 0x6D67;
241$big5_map[0x94][0xCA] = 0x799B;
242$big5_map[0x95][0xD9] = 0x9097;
243$big5_map[0x96][0x44] = 0x975D;
244$big5_map[0x96][0xED] = 0x701E;
245$big5_map[0x96][0xFC] = 0x5B28;
246$big5_map[0x9B][0x76] = 0x7201;
247$big5_map[0x9B][0x78] = 0x77D7;
248$big5_map[0x9B][0x7B] = 0x7E87;
249$big5_map[0x9B][0xC6] = 0x99D6;
250$big5_map[0x9B][0xDE] = 0x91D4;
251$big5_map[0x9B][0xEC] = 0x60DE;
252$big5_map[0x9B][0xF6] = 0x6FB6;
253$big5_map[0x9C][0x42] = 0x8F36;
254$big5_map[0x9C][0x53] = 0x4FBB;
255$big5_map[0x9C][0x62] = 0x71DF;
256$big5_map[0x9C][0x68] = 0x9104;
257$big5_map[0x9C][0x6B] = 0x9DF0;
258$big5_map[0x9C][0x77] = 0x83CF;
259$big5_map[0x9C][0xBC] = 0x5C10;
260$big5_map[0x9C][0xBD] = 0x79E3;
261$big5_map[0x9C][0xD0] = 0x5A67;
262$big5_map[0x9D][0x57] = 0x8F0B;
263$big5_map[0x9D][0x5A] = 0x7B51;
264$big5_map[0x9D][0xC4] = 0x62D0;
265$big5_map[0x9E][0xA9] = 0x6062;
266$big5_map[0x9E][0xEF] = 0x75F9;
267$big5_map[0x9E][0xFD] = 0x6C4A;
268$big5_map[0x9F][0x60] = 0x9B2E;
269$big5_map[0x9F][0x66] = 0x9F17;
270$big5_map[0x9F][0xCB] = 0x50ED;
271$big5_map[0x9F][0xD8] = 0x5F0C;
272$big5_map[0xA0][0x63] = 0x880F;
273$big5_map[0xA0][0x77] = 0x62CE;
274$big5_map[0xA0][0xD5] = 0x7468;
275$big5_map[0xA0][0xDF] = 0x7162;
276$big5_map[0xA0][0xE4] = 0x7250;
277$big5_map[0xFA][0x5F] = 0x5029;
278$big5_map[0xFA][0x66] = 0x507D;
279$big5_map[0xFA][0xBD] = 0x5305;
280$big5_map[0xFA][0xC5] = 0x5344;
281$big5_map[0xFA][0xD5] = 0x537F;
282$big5_map[0xFB][0x48] = 0x5605;
283$big5_map[0xFB][0xB8] = 0x5A77;
284$big5_map[0xFB][0xF3] = 0x5E75;
285$big5_map[0xFB][0xF9] = 0x5ED0;
286$big5_map[0xFC][0x4F] = 0x5F58;
287$big5_map[0xFC][0x6C] = 0x60A4;
288$big5_map[0xFC][0xB9] = 0x6490;
289$big5_map[0xFC][0xE2] = 0x6674;
290$big5_map[0xFC][0xF1] = 0x675E;
291$big5_map[0xFD][0xB7] = 0x6C9C;
292$big5_map[0xFD][0xB8] = 0x6E1D;
293$big5_map[0xFD][0xBB] = 0x6E2F;
294$big5_map[0xFD][0xF1] = 0x716E;
295$big5_map[0xFE][0x52] = 0x732A;
296$big5_map[0xFE][0x6F] = 0x745C;
297$big5_map[0xFE][0xAA] = 0x74E9;
298$big5_map[0xFE][0xDD] = 0x7809;
299
300$pua = 0;
301$compat[0] = 0; # 1993
302$compat[1] = 0; # 2000
303$compat[2] = 0; # 2001
304
305$filename = "big5-iso.txt";
306open IN, ("input/" . $filename) or die "Cannot read " . $filename;
307while (<IN>)
308{
309    if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/)
310    {
311        $big5 = oct("0x" . $1);
312		$utf32_1993 = oct("0x" . $2);
313        $utf32_2000 = oct("0x" . $3);
314        $utf32_2001 = oct("0x" . $4);
315        isValidBig5($big5)
316            or die "Bad Big5 char " . printBig5($big5);
317        isValidUtf32($utf32_1993)
318            or die "Bad UTF32 char " . printUtf32($utf32_1993);
319        isValidUtf32($utf32_2000)
320            or die "Bad UTF32 char " . printUtf32($utf32_2000);
321        isValidUtf32($utf32_2001)
322            or die "Bad UTF32 char " . printUtf32($utf32_2001);
323
324		$utf32 = $surrogates ? $utf32_2001 : $utf32_2000;
325
326		if (defined($underlying_big5[$utf32])
327			|| defined($underlying_cp950[$utf32]))
328		{
329			if (defined($underlying_big5[$utf32])
330				&& defined($underlying_cp950[$utf32])
331				&& $underlying_big5[$utf32] == $underlying_cp950[$utf32]
332				&& $underlying_big5[$utf32] == $big5
333				||
334				defined($underlying_big5[$utf32])
335				&& !defined($underlying_cp950[$utf32])
336				&& $underlying_big5[$utf32] == $big5
337				||
338				!defined($underlying_big5[$utf32])
339				&& defined($underlying_cp950[$utf32])
340				&& $underlying_cp950[$utf32] == $big5)
341			{
342				# ignore
343
344				# Depending on real underlying mapping (cf.
345				# ../convertbig5hkscs.tab), it would be possible to save some
346				# table space by dropping those HKSCS code points that are
347				# already covered by the underlying mapping.
348			}
349			else
350			{
351				print "XXX mapping underlying";
352				if (defined($underlying_big5[$utf32])
353					&& defined($underlying_cp950[$utf32])
354					&& $underlying_big5[$utf32] == $underlying_cp950[$utf32])
355				{
356					print " Big5/CP950 ", printBig5($underlying_big5[$utf32]);
357				}
358				else
359				{
360					if (defined($underlying_big5[$utf32]))
361					{
362						print " Big5 ", printBig5($underlying_big5[$utf32]);
363					}
364					if (defined($underlying_cp950[$utf32]))
365					{
366						print " CP950 ", printBig5($underlying_cp950[$utf32]);
367					}
368				}
369				print " and HKSCS ", printBig5($big5), " to ",
370				      printUtf32($utf32), "\n";
371			}
372		}
373
374        if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF)
375        {
376            ++$pua;
377        }
378
379        $big5_row = $big5 >> 8;
380        $big5_column = $big5 & 0xFF;
381        if (defined($big5_map[$big5_row][$big5_column]))
382        {
383            die "Bad Big5 mapping " . printBig5($big5);
384        }
385        $big5_map[$big5_row][$big5_column] = $utf32;
386
387		addMapping($utf32, $big5, -1);
388
389		if ($utf32_2001 != $utf32)
390		{
391			addMapping($utf32_2001, $big5, 2);
392		}
393		if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001)
394		{
395			addMapping($utf32_2000, $big5, 1);
396		}
397		if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000
398			&& $utf32_1993 != $utf32_2001)
399		{
400			addMapping($utf32_1993, $big5, 0);
401		}
402    }
403}
404close IN;
405
406print $pua, " mappings to PUA\n";
407print $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0);
408print $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0);
409print $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0);
410
411if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
412{
413    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
414    {
415        if (defined($uni_map[0][0][$uni_index]))
416        {
417            $big5 = $uni_map[0][0][$utf32];
418            die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5);
419        }
420    }
421}
422
423$filename = lc($id) . ".tab";
424open OUT, ("> " . $filename) or die "Cannot write " . $filename;
425
426{
427    $filename = lc($id). ".pl";
428    open IN, $filename or die "Cannot read ". $filename;
429    $first = 1;
430    while (<IN>)
431    {
432        if (/^\#!.*$/)
433        {
434        }
435        elsif (/^\#(\*.*)$/)
436        {
437            if ($first == 1)
438            {
439                print OUT "/", $1, "\n";
440                $first = 0;
441            }
442            else
443            {
444                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
445            }
446        }
447        elsif (/^\# (.*)$/)
448        {
449            print OUT " *", $1, "\n";
450        }
451        elsif (/^\#(.*)$/)
452        {
453            print OUT " *", $1, "\n";
454        }
455        else
456        {
457            goto done;
458        }
459    }
460  done:
461}
462
463print OUT "\n",
464          "#ifndef _SAL_TYPES_H_\n",
465          "#include \"sal/types.h\"\n",
466          "#endif\n",
467          "\n";
468
469print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
470$big5_data_index = 0;
471$big5_rows = 0;
472$big5_chars = 0;
473for ($big5_row = 0; $big5_row <= 255; ++$big5_row)
474{
475    $big5_row_first = -1;
476    for ($big5_column = 0; $big5_column <= 255; ++$big5_column)
477    {
478        if (defined($big5_map[$big5_row][$big5_column]))
479        {
480            if ($big5_row_first == -1)
481            {
482                $big5_row_first = $big5_column;
483            }
484            $big5_row_last = $big5_column;
485        }
486    }
487    if ($big5_row_first != -1)
488    {
489        $big5_data_offsets[$big5_row] = $big5_data_index;
490        ++$big5_rows;
491        print OUT " /* row ", $big5_row, " */\n";
492
493        $big5_row_surrogates_first = -1;
494        $big5_row_chars = 0;
495        $big5_row_surrogates = 0;
496
497        print OUT "  ", $big5_row_first, " | (", $big5_row_last,
498                  " << 8), /* first, last */\n";
499        ++$big5_data_index;
500
501        print OUT "  ", printSpaces(7, 10, $big5_row_first);
502        $bol = 0;
503        for ($big5_column = $big5_row_first;
504             $big5_column <= $big5_row_last;
505             ++$big5_column)
506        {
507            if ($bol == 1)
508            {
509                print OUT "  ";
510                $bol = 0;
511            }
512            if (defined($big5_map[$big5_row][$big5_column]))
513            {
514                $utf32 = $big5_map[$big5_row][$big5_column];
515                ++$big5_row_chars;
516                if ($utf32 <= 0xFFFF)
517                {
518                    printf OUT "0x%04X,", $utf32;
519                }
520                else
521                {
522                    ++$big5_row_surrogates;
523                    printf OUT "0x%04X,",
524                               (0xD800 | (($utf32 - 0x10000) >> 10));
525                    if ($big5_row_surrogates_first == -1)
526                    {
527                        $big5_row_surrogates_first = $big5_column;
528                    }
529                    $big5_row_surrogates_last = $big5_column;
530                }
531            }
532            else
533            {
534                printf OUT "0xffff,";
535            }
536            ++$big5_data_index;
537            if ($big5_column % 10 == 9)
538            {
539                print OUT "\n";
540                $bol = 1;
541            }
542        }
543        if ($bol == 0)
544        {
545            print OUT "\n";
546        }
547
548        if ($big5_row_surrogates_first != -1)
549        {
550            print OUT "  ", $big5_row_surrogates_first,
551                      ", /* first low-surrogate */\n";
552            ++$big5_data_index;
553
554            print OUT "  ", printSpaces(7, 10, $big5_row_surrogates_first);
555            $bol = 0;
556            for ($big5_column = $big5_row_surrogates_first;
557                 $big5_column <= $big5_row_surrogates_last;
558                 ++$big5_column)
559            {
560                if ($bol == 1)
561                {
562                    print OUT "  ";
563                    $bol = 0;
564                }
565                $utf32 = 0;
566                if (defined($big5_map[$big5_row][$big5_column]))
567                {
568                    $utf32 = $big5_map[$big5_row][$big5_column];
569                }
570                if ($utf32 <= 0xFFFF)
571                {
572                    printf OUT "     0,";
573                }
574                else
575                {
576                    printf OUT "0x%04X,",
577                               (0xDC00 | (($utf32 - 0x10000) & 0x3FF));
578                }
579                ++$big5_data_index;
580                if ($big5_column % 10 == 9)
581                {
582                    print OUT "\n";
583                    $bol = 1;
584                }
585            }
586            if ($bol == 0)
587            {
588                print OUT "\n";
589            }
590        }
591
592        $big5_chars += $big5_row_chars;
593        $big5_data_space[$big5_row]
594            = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2;
595        $big5_data_used[$big5_row]
596            = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ?
597                                          0 : 1 + $big5_row_surrogates))
598                  * 2;
599    }
600    else
601    {
602        print OUT " /* row ", $big5_row, ": --- */\n";
603        $big5_data_offsets[$big5_row] = -1;
604    }
605}
606print OUT "};\n\n";
607print "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n";
608
609print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
610$big5_rowoffsets_used = 0;
611for ($big5_row = 0; $big5_row <= 255; ++$big5_row)
612{
613    if ($big5_data_offsets[$big5_row] == -1)
614    {
615        print OUT "  -1, /* row ", $big5_row, " */\n";
616    }
617    else
618    {
619        print OUT "  ",
620                  $big5_data_offsets[$big5_row],
621                  ", /* row ",
622                  $big5_row,
623                  "; ",
624                  printStats($big5_data_used[$big5_row],
625                             $big5_data_space[$big5_row]),
626                  " */\n";
627        $big5_rowoffsets_used += 4;
628    }
629}
630print OUT "};\n\n";
631
632print OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n";
633$uni_data_index = 0;
634for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
635{
636    if (defined($uni_plane_used[$uni_plane]))
637    {
638        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
639        {
640            if (defined($uni_page_used[$uni_plane][$uni_page]))
641            {
642                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
643                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
644                          " */\n";
645
646                $uni_page_first = -1;
647                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
648                {
649                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
650                    {
651                        if ($uni_page_first == -1)
652                        {
653                            $uni_page_first = $uni_index;
654                        }
655                        $uni_page_last = $uni_index;
656                    }
657                }
658
659                $uni_data_used[$uni_plane][$uni_page] = 0;
660
661                print OUT "  ", $uni_page_first, " | (", $uni_page_last,
662                          " << 8), /* first, last */\n";
663                ++$uni_data_index;
664                $uni_data_used[$uni_plane][$uni_page] += 2;
665
666                print OUT "  ", printSpaces(7, 10, $uni_page_first);
667                $bol = 0;
668                for ($uni_index = $uni_page_first;
669                     $uni_index <= $uni_page_last;
670                     ++$uni_index)
671                {
672                    if ($bol == 1)
673                    {
674                        print OUT "  ";
675                        $bol = 0;
676                    }
677                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
678                    {
679                        $big5 = $uni_map[$uni_plane][$uni_page][$uni_index];
680                        printf OUT "0x%04X,", $big5;
681                        $uni_data_used[$uni_plane][$uni_page] += 2;
682                    }
683                    else
684                    {
685                        print OUT "     0,";
686                    }
687                    ++$uni_data_index;
688                    if ($uni_index % 10 == 9)
689                    {
690                        print OUT "\n";
691                        $bol = 1;
692                    }
693                }
694                if ($bol == 0)
695                {
696                    print OUT "\n";
697                }
698
699                $uni_data_space[$uni_plane][$uni_page]
700                    = ($uni_data_index
701                       - $uni_data_offsets[$uni_plane][$uni_page]) * 2;
702            }
703            else
704            {
705                $uni_data_offsets[$uni_plane][$uni_page] = -1;
706                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
707                          ": --- */\n";
708            }
709        }
710    }
711    else
712    {
713        print OUT " /* plane ", $uni_plane, ": --- */\n";
714    }
715}
716print OUT "};\n\n";
717
718print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
719for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
720{
721    if (defined($uni_plane_used[$uni_plane]))
722    {
723        $uni_pageoffsets_used[$uni_plane] = 0;
724        $uni_data_used_sum[$uni_plane] = 0;
725        $uni_data_space_sum[$uni_plane] = 0;
726        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
727        {
728            $offset = $uni_data_offsets[$uni_plane][$uni_page];
729            if ($offset == -1)
730            {
731                print OUT "  -1, /* plane ",
732                          $uni_plane,
733                          ", page ",
734                          $uni_page,
735                          " */\n";
736            }
737            else
738            {
739                print OUT "  ",
740                          $offset,
741                          ", /* plane ",
742                          $uni_plane,
743                          ", page ",
744                          $uni_page,
745                          "; ",
746                          printStats($uni_data_used[$uni_plane][$uni_page],
747                                     $uni_data_space[$uni_plane][$uni_page]),
748                          " */\n";
749                $uni_pageoffsets_used[$uni_plane] += 4;
750                $uni_data_used_sum[$uni_plane]
751                    += $uni_data_used[$uni_plane][$uni_page];
752                $uni_data_space_sum[$uni_plane]
753                    += $uni_data_space[$uni_plane][$uni_page];
754            }
755        }
756    }
757    else
758    {
759        print OUT "  /* plane ", $uni_plane, ": --- */\n";
760    }
761}
762print OUT "};\n\n";
763
764print OUT "static sal_Int32 const aImplUnicodeTo",
765          $id,
766          "PlaneOffsets[] = {\n";
767$uni_page_offset = 0;
768$uni_planeoffsets_used = 0;
769$uni_pageoffsets_used_sum = 0;
770$uni_pageoffsets_space_sum = 0;
771$uni_data_used_sum2 = 0;
772$uni_data_space_sum2 = 0;
773for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
774{
775    if (defined ($uni_plane_used[$uni_plane]))
776    {
777        print OUT "  ",
778                  $uni_page_offset++,
779                  " * 256, /* plane ",
780                  $uni_plane,
781                  "; ",
782                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
783                  ", ",
784                  printStats($uni_data_used_sum[$uni_plane],
785                             $uni_data_space_sum[$uni_plane]),
786                  " */\n";
787        $uni_planeoffsets_used += 4;
788        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
789        $uni_pageoffsets_space_sum += 256 * 4;
790        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
791        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
792    }
793    else
794    {
795        print OUT "  -1, /* plane ", $uni_plane, " */\n";
796    }
797}
798print OUT " /* ",
799          printStats($uni_planeoffsets_used, 17 * 4),
800          ", ",
801          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
802          ", ",
803          printStats($uni_data_used_sum2, $uni_data_space_sum2),
804          " */\n};\n";
805
806close OUT;
807