1*cdf0e10cSrcweir#!/usr/bin/perl
2*cdf0e10cSrcweir#*************************************************************************
3*cdf0e10cSrcweir#
4*cdf0e10cSrcweir# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5*cdf0e10cSrcweir#
6*cdf0e10cSrcweir# Copyright 2000, 2010 Oracle and/or its affiliates.
7*cdf0e10cSrcweir#
8*cdf0e10cSrcweir# OpenOffice.org - a multi-platform office productivity suite
9*cdf0e10cSrcweir#
10*cdf0e10cSrcweir# This file is part of OpenOffice.org.
11*cdf0e10cSrcweir#
12*cdf0e10cSrcweir# OpenOffice.org is free software: you can redistribute it and/or modify
13*cdf0e10cSrcweir# it under the terms of the GNU Lesser General Public License version 3
14*cdf0e10cSrcweir# only, as published by the Free Software Foundation.
15*cdf0e10cSrcweir#
16*cdf0e10cSrcweir# OpenOffice.org is distributed in the hope that it will be useful,
17*cdf0e10cSrcweir# but WITHOUT ANY WARRANTY; without even the implied warranty of
18*cdf0e10cSrcweir# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19*cdf0e10cSrcweir# GNU Lesser General Public License version 3 for more details
20*cdf0e10cSrcweir# (a copy is included in the LICENSE file that accompanied this code).
21*cdf0e10cSrcweir#
22*cdf0e10cSrcweir# You should have received a copy of the GNU Lesser General Public License
23*cdf0e10cSrcweir# version 3 along with OpenOffice.org.  If not, see
24*cdf0e10cSrcweir# <http://www.openoffice.org/license.html>
25*cdf0e10cSrcweir# for a copy of the LGPLv3 License.
26*cdf0e10cSrcweir#
27*cdf0e10cSrcweir#*************************************************************************
28*cdf0e10cSrcweir
29*cdf0e10cSrcweir# The following files must be available in a ./input subdir:
30*cdf0e10cSrcweir
31*cdf0e10cSrcweir# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt>
32*cdf0e10cSrcweir
33*cdf0e10cSrcweir# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>:
34*cdf0e10cSrcweir#  "Unicode version: 1.1    Table version: 0.0d3    Date: 11 February 1994"
35*cdf0e10cSrcweir#  Only used to track Unicode characters that are mapped from both Big5 and
36*cdf0e10cSrcweir#  HKSCS.
37*cdf0e10cSrcweir
38*cdf0e10cSrcweir# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>:
39*cdf0e10cSrcweir#  "Unicode version: 2.0    Table version: 2.01    Date: 1/7/2000"
40*cdf0e10cSrcweir#  Only used to track Unicode characters that are mapped from both CP950 and
41*cdf0e10cSrcweir#  HKSCS.
42*cdf0e10cSrcweir
43*cdf0e10cSrcweir$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0
44*cdf0e10cSrcweir
45*cdf0e10cSrcweir$id = "Big5Hkscs2001";
46*cdf0e10cSrcweir
47*cdf0e10cSrcweirsub isValidUtf32
48*cdf0e10cSrcweir{
49*cdf0e10cSrcweir    my $utf32 = $_[0];
50*cdf0e10cSrcweir    return $utf32 >= 0 && $utf32 <= 0x10FFFF
51*cdf0e10cSrcweir           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
52*cdf0e10cSrcweir           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
53*cdf0e10cSrcweir           && ($utf32 & 0xFFFF) < 0xFFFE;
54*cdf0e10cSrcweir}
55*cdf0e10cSrcweir
56*cdf0e10cSrcweirsub printUtf32
57*cdf0e10cSrcweir{
58*cdf0e10cSrcweir    my $utf32 = $_[0];
59*cdf0e10cSrcweir    return sprintf("U+%04X", $utf32);
60*cdf0e10cSrcweir}
61*cdf0e10cSrcweir
62*cdf0e10cSrcweirsub isValidBig5
63*cdf0e10cSrcweir{
64*cdf0e10cSrcweir    my $big5 = $_[0];
65*cdf0e10cSrcweir    my $big5_row = $big5 >> 8;
66*cdf0e10cSrcweir    my $big5_column = $big5 & 0xFF;
67*cdf0e10cSrcweir    return $big5_row >= 0x81 && $big5_row <= 0xFE
68*cdf0e10cSrcweir           && ($big5_column >= 0x40 && $big5_column <= 0x7E
69*cdf0e10cSrcweir               || $big5_column >= 0xA1 && $big5_column <= 0xFE);
70*cdf0e10cSrcweir}
71*cdf0e10cSrcweir
72*cdf0e10cSrcweirsub printBig5
73*cdf0e10cSrcweir{
74*cdf0e10cSrcweir    my $big5 = $_[0];
75*cdf0e10cSrcweir    return sprintf("%04X", $big5);
76*cdf0e10cSrcweir}
77*cdf0e10cSrcweir
78*cdf0e10cSrcweirsub printStats
79*cdf0e10cSrcweir{
80*cdf0e10cSrcweir    my $used = $_[0];
81*cdf0e10cSrcweir    my $space = $_[1];
82*cdf0e10cSrcweir    return sprintf("%d/%d bytes (%.1f%%)",
83*cdf0e10cSrcweir                   $used,
84*cdf0e10cSrcweir                   $space,
85*cdf0e10cSrcweir                   $used * 100 / $space);
86*cdf0e10cSrcweir}
87*cdf0e10cSrcweir
88*cdf0e10cSrcweirsub printSpaces
89*cdf0e10cSrcweir{
90*cdf0e10cSrcweir    my $column_width = $_[0];
91*cdf0e10cSrcweir    my $columns_per_line = $_[1];
92*cdf0e10cSrcweir    my $end = $_[2];
93*cdf0e10cSrcweir    $output = "";
94*cdf0e10cSrcweir    for ($i = int($end / $columns_per_line) * $columns_per_line;
95*cdf0e10cSrcweir         $i < $end;
96*cdf0e10cSrcweir         ++$i)
97*cdf0e10cSrcweir    {
98*cdf0e10cSrcweir        for ($j = 0; $j < $column_width; ++$j)
99*cdf0e10cSrcweir        {
100*cdf0e10cSrcweir            $output = $output . " ";
101*cdf0e10cSrcweir        }
102*cdf0e10cSrcweir    }
103*cdf0e10cSrcweir    return $output;
104*cdf0e10cSrcweir}
105*cdf0e10cSrcweir
106*cdf0e10cSrcweirsub addMapping
107*cdf0e10cSrcweir{
108*cdf0e10cSrcweir	my $utf32 = $_[0];
109*cdf0e10cSrcweir	my $big5 = $_[1];
110*cdf0e10cSrcweir	my $comp = $_[2];
111*cdf0e10cSrcweir
112*cdf0e10cSrcweir	$uni_plane = $utf32 >> 16;
113*cdf0e10cSrcweir	$uni_page = ($utf32 >> 8) & 0xFF;
114*cdf0e10cSrcweir	$uni_index = $utf32 & 0xFF;
115*cdf0e10cSrcweir
116*cdf0e10cSrcweir	if (!defined($uni_plane_used[$uni_plane])
117*cdf0e10cSrcweir		|| !defined($uni_page_used[$uni_plane][$uni_page])
118*cdf0e10cSrcweir		|| !defined($uni_map[$uni_plane][$uni_page][$uni_index]))
119*cdf0e10cSrcweir	{
120*cdf0e10cSrcweir		$uni_map[$uni_plane][$uni_page][$uni_index] = $big5;
121*cdf0e10cSrcweir		$uni_plane_used[$uni_plane] = 1;
122*cdf0e10cSrcweir		$uni_page_used[$uni_plane][$uni_page] = 1;
123*cdf0e10cSrcweir		if ($comp != -1)
124*cdf0e10cSrcweir		{
125*cdf0e10cSrcweir			++$compat[$comp];
126*cdf0e10cSrcweir		}
127*cdf0e10cSrcweir	}
128*cdf0e10cSrcweir	else
129*cdf0e10cSrcweir	{
130*cdf0e10cSrcweir		$big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index];
131*cdf0e10cSrcweir		print "WARNING!  Mapping ", printUtf32($utf32), " to ",
132*cdf0e10cSrcweir		      printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""),
133*cdf0e10cSrcweir		      printBig5($big5), "\n";
134*cdf0e10cSrcweir	}
135*cdf0e10cSrcweir}
136*cdf0e10cSrcweir
137*cdf0e10cSrcweir# Build mappings to track Unicode characters that are mapped from both Big5/
138*cdf0e10cSrcweir# CP950 and HKSCS:
139*cdf0e10cSrcweir{
140*cdf0e10cSrcweir	$filename = "BIG5.TXT";
141*cdf0e10cSrcweir	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
142*cdf0e10cSrcweir	while (<IN>)
143*cdf0e10cSrcweir	{
144*cdf0e10cSrcweir		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
145*cdf0e10cSrcweir		{
146*cdf0e10cSrcweir			$big5 = oct($1);
147*cdf0e10cSrcweir			$utf32 = oct($2);
148*cdf0e10cSrcweir			isValidBig5($big5)
149*cdf0e10cSrcweir				or die "Bad Big5 char " . printBig5($big5);
150*cdf0e10cSrcweir			isValidUtf32($utf32)
151*cdf0e10cSrcweir				or die "Bad UTF32 char " . printUtf32($utf32);
152*cdf0e10cSrcweir			if ($utf32 != 0xFFFD)
153*cdf0e10cSrcweir			{
154*cdf0e10cSrcweir				if (defined($underlying_big5[$utf32]))
155*cdf0e10cSrcweir				{
156*cdf0e10cSrcweir					print "WARNING!  In ", $filename, ", both ",
157*cdf0e10cSrcweir					      printBig5($underlying_big5[$utf32]), " and ",
158*cdf0e10cSrcweir					      printBig5($big5), " map to ", printUtf32($utf32),
159*cdf0e10cSrcweir					      "\n";
160*cdf0e10cSrcweir				}
161*cdf0e10cSrcweir				else
162*cdf0e10cSrcweir				{
163*cdf0e10cSrcweir					$underlying_big5[$utf32] = $big5;
164*cdf0e10cSrcweir				}
165*cdf0e10cSrcweir			}
166*cdf0e10cSrcweir		}
167*cdf0e10cSrcweir	}
168*cdf0e10cSrcweir	close IN;
169*cdf0e10cSrcweir
170*cdf0e10cSrcweir	$filename = "CP950.TXT";
171*cdf0e10cSrcweir	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
172*cdf0e10cSrcweir	while (<IN>)
173*cdf0e10cSrcweir	{
174*cdf0e10cSrcweir		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
175*cdf0e10cSrcweir		{
176*cdf0e10cSrcweir			$big5 = oct($1);
177*cdf0e10cSrcweir			$utf32 = oct($2);
178*cdf0e10cSrcweir			isValidBig5($big5)
179*cdf0e10cSrcweir				or die "Bad Big5 char " . printBig5($big5);
180*cdf0e10cSrcweir			isValidUtf32($utf32)
181*cdf0e10cSrcweir				or die "Bad UTF32 char " . printUtf32($utf32);
182*cdf0e10cSrcweir			if (defined($underlying_cp950[$utf32]))
183*cdf0e10cSrcweir			{
184*cdf0e10cSrcweir				print "WARNING!  In ", $filename, ", both ",
185*cdf0e10cSrcweir				      printBig5($underlying_cp950[$utf32]), " and ",
186*cdf0e10cSrcweir				      printBig5($big5), " map to ", printUtf32($utf32), "\n";
187*cdf0e10cSrcweir			}
188*cdf0e10cSrcweir			else
189*cdf0e10cSrcweir			{
190*cdf0e10cSrcweir				$underlying_cp950[$utf32] = $big5;
191*cdf0e10cSrcweir			}
192*cdf0e10cSrcweir		}
193*cdf0e10cSrcweir	}
194*cdf0e10cSrcweir	close IN;
195*cdf0e10cSrcweir}
196*cdf0e10cSrcweir
197*cdf0e10cSrcweir# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some
198*cdf0e10cSrcweir# nonstandard Unicode points, so they are explicitly mentioned here to map
199*cdf0e10cSrcweir# to the standard Unicode PUA points.  (In the other direction, the unofficial
200*cdf0e10cSrcweir# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless,
201*cdf0e10cSrcweir# since all Unicode characters involved are already covered by the official
202*cdf0e10cSrcweir# Big5-HKSCS mappings.)
203*cdf0e10cSrcweir$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1);
204*cdf0e10cSrcweir$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1);
205*cdf0e10cSrcweir$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1);
206*cdf0e10cSrcweir$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1);
207*cdf0e10cSrcweir$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1);
208*cdf0e10cSrcweir$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1);
209*cdf0e10cSrcweir
210*cdf0e10cSrcweir# The following implements the mapping of Big5-HKSCS compatibility points
211*cdf0e10cSrcweir# (GCCS characters unified with other HKSCS characters) to Unicode.  In the
212*cdf0e10cSrcweir# other direction, characters from Unicode's PUA will map to these Big5-HKSCS
213*cdf0e10cSrcweir# compatibility points.  (See the first list in <http://www.info.gov.hk/
214*cdf0e10cSrcweir# digital21/eng/hkscs/download/big5cmp.txt>.)
215*cdf0e10cSrcweir$big5_map[0x8E][0x69] = 0x7BB8;
216*cdf0e10cSrcweir$big5_map[0x8E][0x6F] = 0x7C06;
217*cdf0e10cSrcweir$big5_map[0x8E][0x7E] = 0x7CCE;
218*cdf0e10cSrcweir$big5_map[0x8E][0xAB] = 0x7DD2;
219*cdf0e10cSrcweir$big5_map[0x8E][0xB4] = 0x7E1D;
220*cdf0e10cSrcweir$big5_map[0x8E][0xCD] = 0x8005;
221*cdf0e10cSrcweir$big5_map[0x8E][0xD0] = 0x8028;
222*cdf0e10cSrcweir$big5_map[0x8F][0x57] = 0x83C1;
223*cdf0e10cSrcweir$big5_map[0x8F][0x69] = 0x84A8;
224*cdf0e10cSrcweir$big5_map[0x8F][0x6E] = 0x840F;
225*cdf0e10cSrcweir$big5_map[0x8F][0xCB] = 0x89A6;
226*cdf0e10cSrcweir$big5_map[0x8F][0xCC] = 0x89A9;
227*cdf0e10cSrcweir$big5_map[0x8F][0xFE] = 0x8D77;
228*cdf0e10cSrcweir$big5_map[0x90][0x6D] = 0x90FD;
229*cdf0e10cSrcweir$big5_map[0x90][0x7A] = 0x92B9;
230*cdf0e10cSrcweir$big5_map[0x90][0xDC] = 0x975C;
231*cdf0e10cSrcweir$big5_map[0x90][0xF1] = 0x97FF;
232*cdf0e10cSrcweir$big5_map[0x91][0xBF] = 0x9F16;
233*cdf0e10cSrcweir$big5_map[0x92][0x44] = 0x8503;
234*cdf0e10cSrcweir$big5_map[0x92][0xAF] = 0x5159;
235*cdf0e10cSrcweir$big5_map[0x92][0xB0] = 0x515B;
236*cdf0e10cSrcweir$big5_map[0x92][0xB1] = 0x515D;
237*cdf0e10cSrcweir$big5_map[0x92][0xB2] = 0x515E;
238*cdf0e10cSrcweir$big5_map[0x92][0xC8] = 0x936E;
239*cdf0e10cSrcweir$big5_map[0x92][0xD1] = 0x7479;
240*cdf0e10cSrcweir$big5_map[0x94][0x47] = 0x6D67;
241*cdf0e10cSrcweir$big5_map[0x94][0xCA] = 0x799B;
242*cdf0e10cSrcweir$big5_map[0x95][0xD9] = 0x9097;
243*cdf0e10cSrcweir$big5_map[0x96][0x44] = 0x975D;
244*cdf0e10cSrcweir$big5_map[0x96][0xED] = 0x701E;
245*cdf0e10cSrcweir$big5_map[0x96][0xFC] = 0x5B28;
246*cdf0e10cSrcweir$big5_map[0x9B][0x76] = 0x7201;
247*cdf0e10cSrcweir$big5_map[0x9B][0x78] = 0x77D7;
248*cdf0e10cSrcweir$big5_map[0x9B][0x7B] = 0x7E87;
249*cdf0e10cSrcweir$big5_map[0x9B][0xC6] = 0x99D6;
250*cdf0e10cSrcweir$big5_map[0x9B][0xDE] = 0x91D4;
251*cdf0e10cSrcweir$big5_map[0x9B][0xEC] = 0x60DE;
252*cdf0e10cSrcweir$big5_map[0x9B][0xF6] = 0x6FB6;
253*cdf0e10cSrcweir$big5_map[0x9C][0x42] = 0x8F36;
254*cdf0e10cSrcweir$big5_map[0x9C][0x53] = 0x4FBB;
255*cdf0e10cSrcweir$big5_map[0x9C][0x62] = 0x71DF;
256*cdf0e10cSrcweir$big5_map[0x9C][0x68] = 0x9104;
257*cdf0e10cSrcweir$big5_map[0x9C][0x6B] = 0x9DF0;
258*cdf0e10cSrcweir$big5_map[0x9C][0x77] = 0x83CF;
259*cdf0e10cSrcweir$big5_map[0x9C][0xBC] = 0x5C10;
260*cdf0e10cSrcweir$big5_map[0x9C][0xBD] = 0x79E3;
261*cdf0e10cSrcweir$big5_map[0x9C][0xD0] = 0x5A67;
262*cdf0e10cSrcweir$big5_map[0x9D][0x57] = 0x8F0B;
263*cdf0e10cSrcweir$big5_map[0x9D][0x5A] = 0x7B51;
264*cdf0e10cSrcweir$big5_map[0x9D][0xC4] = 0x62D0;
265*cdf0e10cSrcweir$big5_map[0x9E][0xA9] = 0x6062;
266*cdf0e10cSrcweir$big5_map[0x9E][0xEF] = 0x75F9;
267*cdf0e10cSrcweir$big5_map[0x9E][0xFD] = 0x6C4A;
268*cdf0e10cSrcweir$big5_map[0x9F][0x60] = 0x9B2E;
269*cdf0e10cSrcweir$big5_map[0x9F][0x66] = 0x9F17;
270*cdf0e10cSrcweir$big5_map[0x9F][0xCB] = 0x50ED;
271*cdf0e10cSrcweir$big5_map[0x9F][0xD8] = 0x5F0C;
272*cdf0e10cSrcweir$big5_map[0xA0][0x63] = 0x880F;
273*cdf0e10cSrcweir$big5_map[0xA0][0x77] = 0x62CE;
274*cdf0e10cSrcweir$big5_map[0xA0][0xD5] = 0x7468;
275*cdf0e10cSrcweir$big5_map[0xA0][0xDF] = 0x7162;
276*cdf0e10cSrcweir$big5_map[0xA0][0xE4] = 0x7250;
277*cdf0e10cSrcweir$big5_map[0xFA][0x5F] = 0x5029;
278*cdf0e10cSrcweir$big5_map[0xFA][0x66] = 0x507D;
279*cdf0e10cSrcweir$big5_map[0xFA][0xBD] = 0x5305;
280*cdf0e10cSrcweir$big5_map[0xFA][0xC5] = 0x5344;
281*cdf0e10cSrcweir$big5_map[0xFA][0xD5] = 0x537F;
282*cdf0e10cSrcweir$big5_map[0xFB][0x48] = 0x5605;
283*cdf0e10cSrcweir$big5_map[0xFB][0xB8] = 0x5A77;
284*cdf0e10cSrcweir$big5_map[0xFB][0xF3] = 0x5E75;
285*cdf0e10cSrcweir$big5_map[0xFB][0xF9] = 0x5ED0;
286*cdf0e10cSrcweir$big5_map[0xFC][0x4F] = 0x5F58;
287*cdf0e10cSrcweir$big5_map[0xFC][0x6C] = 0x60A4;
288*cdf0e10cSrcweir$big5_map[0xFC][0xB9] = 0x6490;
289*cdf0e10cSrcweir$big5_map[0xFC][0xE2] = 0x6674;
290*cdf0e10cSrcweir$big5_map[0xFC][0xF1] = 0x675E;
291*cdf0e10cSrcweir$big5_map[0xFD][0xB7] = 0x6C9C;
292*cdf0e10cSrcweir$big5_map[0xFD][0xB8] = 0x6E1D;
293*cdf0e10cSrcweir$big5_map[0xFD][0xBB] = 0x6E2F;
294*cdf0e10cSrcweir$big5_map[0xFD][0xF1] = 0x716E;
295*cdf0e10cSrcweir$big5_map[0xFE][0x52] = 0x732A;
296*cdf0e10cSrcweir$big5_map[0xFE][0x6F] = 0x745C;
297*cdf0e10cSrcweir$big5_map[0xFE][0xAA] = 0x74E9;
298*cdf0e10cSrcweir$big5_map[0xFE][0xDD] = 0x7809;
299*cdf0e10cSrcweir
300*cdf0e10cSrcweir$pua = 0;
301*cdf0e10cSrcweir$compat[0] = 0; # 1993
302*cdf0e10cSrcweir$compat[1] = 0; # 2000
303*cdf0e10cSrcweir$compat[2] = 0; # 2001
304*cdf0e10cSrcweir
305*cdf0e10cSrcweir$filename = "big5-iso.txt";
306*cdf0e10cSrcweiropen IN, ("input/" . $filename) or die "Cannot read " . $filename;
307*cdf0e10cSrcweirwhile (<IN>)
308*cdf0e10cSrcweir{
309*cdf0e10cSrcweir    if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/)
310*cdf0e10cSrcweir    {
311*cdf0e10cSrcweir        $big5 = oct("0x" . $1);
312*cdf0e10cSrcweir		$utf32_1993 = oct("0x" . $2);
313*cdf0e10cSrcweir        $utf32_2000 = oct("0x" . $3);
314*cdf0e10cSrcweir        $utf32_2001 = oct("0x" . $4);
315*cdf0e10cSrcweir        isValidBig5($big5)
316*cdf0e10cSrcweir            or die "Bad Big5 char " . printBig5($big5);
317*cdf0e10cSrcweir        isValidUtf32($utf32_1993)
318*cdf0e10cSrcweir            or die "Bad UTF32 char " . printUtf32($utf32_1993);
319*cdf0e10cSrcweir        isValidUtf32($utf32_2000)
320*cdf0e10cSrcweir            or die "Bad UTF32 char " . printUtf32($utf32_2000);
321*cdf0e10cSrcweir        isValidUtf32($utf32_2001)
322*cdf0e10cSrcweir            or die "Bad UTF32 char " . printUtf32($utf32_2001);
323*cdf0e10cSrcweir
324*cdf0e10cSrcweir		$utf32 = $surrogates ? $utf32_2001 : $utf32_2000;
325*cdf0e10cSrcweir
326*cdf0e10cSrcweir		if (defined($underlying_big5[$utf32])
327*cdf0e10cSrcweir			|| defined($underlying_cp950[$utf32]))
328*cdf0e10cSrcweir		{
329*cdf0e10cSrcweir			if (defined($underlying_big5[$utf32])
330*cdf0e10cSrcweir				&& defined($underlying_cp950[$utf32])
331*cdf0e10cSrcweir				&& $underlying_big5[$utf32] == $underlying_cp950[$utf32]
332*cdf0e10cSrcweir				&& $underlying_big5[$utf32] == $big5
333*cdf0e10cSrcweir				||
334*cdf0e10cSrcweir				defined($underlying_big5[$utf32])
335*cdf0e10cSrcweir				&& !defined($underlying_cp950[$utf32])
336*cdf0e10cSrcweir				&& $underlying_big5[$utf32] == $big5
337*cdf0e10cSrcweir				||
338*cdf0e10cSrcweir				!defined($underlying_big5[$utf32])
339*cdf0e10cSrcweir				&& defined($underlying_cp950[$utf32])
340*cdf0e10cSrcweir				&& $underlying_cp950[$utf32] == $big5)
341*cdf0e10cSrcweir			{
342*cdf0e10cSrcweir				# ignore
343*cdf0e10cSrcweir
344*cdf0e10cSrcweir				# Depending on real underlying mapping (cf.
345*cdf0e10cSrcweir				# ../convertbig5hkscs.tab), it would be possible to save some
346*cdf0e10cSrcweir				# table space by dropping those HKSCS code points that are
347*cdf0e10cSrcweir				# already covered by the underlying mapping.
348*cdf0e10cSrcweir			}
349*cdf0e10cSrcweir			else
350*cdf0e10cSrcweir			{
351*cdf0e10cSrcweir				print "XXX mapping underlying";
352*cdf0e10cSrcweir				if (defined($underlying_big5[$utf32])
353*cdf0e10cSrcweir					&& defined($underlying_cp950[$utf32])
354*cdf0e10cSrcweir					&& $underlying_big5[$utf32] == $underlying_cp950[$utf32])
355*cdf0e10cSrcweir				{
356*cdf0e10cSrcweir					print " Big5/CP950 ", printBig5($underlying_big5[$utf32]);
357*cdf0e10cSrcweir				}
358*cdf0e10cSrcweir				else
359*cdf0e10cSrcweir				{
360*cdf0e10cSrcweir					if (defined($underlying_big5[$utf32]))
361*cdf0e10cSrcweir					{
362*cdf0e10cSrcweir						print " Big5 ", printBig5($underlying_big5[$utf32]);
363*cdf0e10cSrcweir					}
364*cdf0e10cSrcweir					if (defined($underlying_cp950[$utf32]))
365*cdf0e10cSrcweir					{
366*cdf0e10cSrcweir						print " CP950 ", printBig5($underlying_cp950[$utf32]);
367*cdf0e10cSrcweir					}
368*cdf0e10cSrcweir				}
369*cdf0e10cSrcweir				print " and HKSCS ", printBig5($big5), " to ",
370*cdf0e10cSrcweir				      printUtf32($utf32), "\n";
371*cdf0e10cSrcweir			}
372*cdf0e10cSrcweir		}
373*cdf0e10cSrcweir
374*cdf0e10cSrcweir        if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF)
375*cdf0e10cSrcweir        {
376*cdf0e10cSrcweir            ++$pua;
377*cdf0e10cSrcweir        }
378*cdf0e10cSrcweir
379*cdf0e10cSrcweir        $big5_row = $big5 >> 8;
380*cdf0e10cSrcweir        $big5_column = $big5 & 0xFF;
381*cdf0e10cSrcweir        if (defined($big5_map[$big5_row][$big5_column]))
382*cdf0e10cSrcweir        {
383*cdf0e10cSrcweir            die "Bad Big5 mapping " . printBig5($big5);
384*cdf0e10cSrcweir        }
385*cdf0e10cSrcweir        $big5_map[$big5_row][$big5_column] = $utf32;
386*cdf0e10cSrcweir
387*cdf0e10cSrcweir		addMapping($utf32, $big5, -1);
388*cdf0e10cSrcweir
389*cdf0e10cSrcweir		if ($utf32_2001 != $utf32)
390*cdf0e10cSrcweir		{
391*cdf0e10cSrcweir			addMapping($utf32_2001, $big5, 2);
392*cdf0e10cSrcweir		}
393*cdf0e10cSrcweir		if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001)
394*cdf0e10cSrcweir		{
395*cdf0e10cSrcweir			addMapping($utf32_2000, $big5, 1);
396*cdf0e10cSrcweir		}
397*cdf0e10cSrcweir		if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000
398*cdf0e10cSrcweir			&& $utf32_1993 != $utf32_2001)
399*cdf0e10cSrcweir		{
400*cdf0e10cSrcweir			addMapping($utf32_1993, $big5, 0);
401*cdf0e10cSrcweir		}
402*cdf0e10cSrcweir    }
403*cdf0e10cSrcweir}
404*cdf0e10cSrcweirclose IN;
405*cdf0e10cSrcweir
406*cdf0e10cSrcweirprint $pua, " mappings to PUA\n";
407*cdf0e10cSrcweirprint $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0);
408*cdf0e10cSrcweirprint $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0);
409*cdf0e10cSrcweirprint $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0);
410*cdf0e10cSrcweir
411*cdf0e10cSrcweirif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
412*cdf0e10cSrcweir{
413*cdf0e10cSrcweir    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
414*cdf0e10cSrcweir    {
415*cdf0e10cSrcweir        if (defined($uni_map[0][0][$uni_index]))
416*cdf0e10cSrcweir        {
417*cdf0e10cSrcweir            $big5 = $uni_map[0][0][$utf32];
418*cdf0e10cSrcweir            die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5);
419*cdf0e10cSrcweir        }
420*cdf0e10cSrcweir    }
421*cdf0e10cSrcweir}
422*cdf0e10cSrcweir
423*cdf0e10cSrcweir$filename = lc($id) . ".tab";
424*cdf0e10cSrcweiropen OUT, ("> " . $filename) or die "Cannot write " . $filename;
425*cdf0e10cSrcweir
426*cdf0e10cSrcweir{
427*cdf0e10cSrcweir    $filename = lc($id). ".pl";
428*cdf0e10cSrcweir    open IN, $filename or die "Cannot read ". $filename;
429*cdf0e10cSrcweir    $first = 1;
430*cdf0e10cSrcweir    while (<IN>)
431*cdf0e10cSrcweir    {
432*cdf0e10cSrcweir        if (/^\#!.*$/)
433*cdf0e10cSrcweir        {
434*cdf0e10cSrcweir        }
435*cdf0e10cSrcweir        elsif (/^\#(\*.*)$/)
436*cdf0e10cSrcweir        {
437*cdf0e10cSrcweir            if ($first == 1)
438*cdf0e10cSrcweir            {
439*cdf0e10cSrcweir                print OUT "/", $1, "\n";
440*cdf0e10cSrcweir                $first = 0;
441*cdf0e10cSrcweir            }
442*cdf0e10cSrcweir            else
443*cdf0e10cSrcweir            {
444*cdf0e10cSrcweir                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
445*cdf0e10cSrcweir            }
446*cdf0e10cSrcweir        }
447*cdf0e10cSrcweir        elsif (/^\# (.*)$/)
448*cdf0e10cSrcweir        {
449*cdf0e10cSrcweir            print OUT " *", $1, "\n";
450*cdf0e10cSrcweir        }
451*cdf0e10cSrcweir        elsif (/^\#(.*)$/)
452*cdf0e10cSrcweir        {
453*cdf0e10cSrcweir            print OUT " *", $1, "\n";
454*cdf0e10cSrcweir        }
455*cdf0e10cSrcweir        else
456*cdf0e10cSrcweir        {
457*cdf0e10cSrcweir            goto done;
458*cdf0e10cSrcweir        }
459*cdf0e10cSrcweir    }
460*cdf0e10cSrcweir  done:
461*cdf0e10cSrcweir}
462*cdf0e10cSrcweir
463*cdf0e10cSrcweirprint OUT "\n",
464*cdf0e10cSrcweir          "#ifndef _SAL_TYPES_H_\n",
465*cdf0e10cSrcweir          "#include \"sal/types.h\"\n",
466*cdf0e10cSrcweir          "#endif\n",
467*cdf0e10cSrcweir          "\n";
468*cdf0e10cSrcweir
469*cdf0e10cSrcweirprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
470*cdf0e10cSrcweir$big5_data_index = 0;
471*cdf0e10cSrcweir$big5_rows = 0;
472*cdf0e10cSrcweir$big5_chars = 0;
473*cdf0e10cSrcweirfor ($big5_row = 0; $big5_row <= 255; ++$big5_row)
474*cdf0e10cSrcweir{
475*cdf0e10cSrcweir    $big5_row_first = -1;
476*cdf0e10cSrcweir    for ($big5_column = 0; $big5_column <= 255; ++$big5_column)
477*cdf0e10cSrcweir    {
478*cdf0e10cSrcweir        if (defined($big5_map[$big5_row][$big5_column]))
479*cdf0e10cSrcweir        {
480*cdf0e10cSrcweir            if ($big5_row_first == -1)
481*cdf0e10cSrcweir            {
482*cdf0e10cSrcweir                $big5_row_first = $big5_column;
483*cdf0e10cSrcweir            }
484*cdf0e10cSrcweir            $big5_row_last = $big5_column;
485*cdf0e10cSrcweir        }
486*cdf0e10cSrcweir    }
487*cdf0e10cSrcweir    if ($big5_row_first != -1)
488*cdf0e10cSrcweir    {
489*cdf0e10cSrcweir        $big5_data_offsets[$big5_row] = $big5_data_index;
490*cdf0e10cSrcweir        ++$big5_rows;
491*cdf0e10cSrcweir        print OUT " /* row ", $big5_row, " */\n";
492*cdf0e10cSrcweir
493*cdf0e10cSrcweir        $big5_row_surrogates_first = -1;
494*cdf0e10cSrcweir        $big5_row_chars = 0;
495*cdf0e10cSrcweir        $big5_row_surrogates = 0;
496*cdf0e10cSrcweir
497*cdf0e10cSrcweir        print OUT "  ", $big5_row_first, " | (", $big5_row_last,
498*cdf0e10cSrcweir                  " << 8), /* first, last */\n";
499*cdf0e10cSrcweir        ++$big5_data_index;
500*cdf0e10cSrcweir
501*cdf0e10cSrcweir        print OUT "  ", printSpaces(7, 10, $big5_row_first);
502*cdf0e10cSrcweir        $bol = 0;
503*cdf0e10cSrcweir        for ($big5_column = $big5_row_first;
504*cdf0e10cSrcweir             $big5_column <= $big5_row_last;
505*cdf0e10cSrcweir             ++$big5_column)
506*cdf0e10cSrcweir        {
507*cdf0e10cSrcweir            if ($bol == 1)
508*cdf0e10cSrcweir            {
509*cdf0e10cSrcweir                print OUT "  ";
510*cdf0e10cSrcweir                $bol = 0;
511*cdf0e10cSrcweir            }
512*cdf0e10cSrcweir            if (defined($big5_map[$big5_row][$big5_column]))
513*cdf0e10cSrcweir            {
514*cdf0e10cSrcweir                $utf32 = $big5_map[$big5_row][$big5_column];
515*cdf0e10cSrcweir                ++$big5_row_chars;
516*cdf0e10cSrcweir                if ($utf32 <= 0xFFFF)
517*cdf0e10cSrcweir                {
518*cdf0e10cSrcweir                    printf OUT "0x%04X,", $utf32;
519*cdf0e10cSrcweir                }
520*cdf0e10cSrcweir                else
521*cdf0e10cSrcweir                {
522*cdf0e10cSrcweir                    ++$big5_row_surrogates;
523*cdf0e10cSrcweir                    printf OUT "0x%04X,",
524*cdf0e10cSrcweir                               (0xD800 | (($utf32 - 0x10000) >> 10));
525*cdf0e10cSrcweir                    if ($big5_row_surrogates_first == -1)
526*cdf0e10cSrcweir                    {
527*cdf0e10cSrcweir                        $big5_row_surrogates_first = $big5_column;
528*cdf0e10cSrcweir                    }
529*cdf0e10cSrcweir                    $big5_row_surrogates_last = $big5_column;
530*cdf0e10cSrcweir                }
531*cdf0e10cSrcweir            }
532*cdf0e10cSrcweir            else
533*cdf0e10cSrcweir            {
534*cdf0e10cSrcweir                printf OUT "0xffff,";
535*cdf0e10cSrcweir            }
536*cdf0e10cSrcweir            ++$big5_data_index;
537*cdf0e10cSrcweir            if ($big5_column % 10 == 9)
538*cdf0e10cSrcweir            {
539*cdf0e10cSrcweir                print OUT "\n";
540*cdf0e10cSrcweir                $bol = 1;
541*cdf0e10cSrcweir            }
542*cdf0e10cSrcweir        }
543*cdf0e10cSrcweir        if ($bol == 0)
544*cdf0e10cSrcweir        {
545*cdf0e10cSrcweir            print OUT "\n";
546*cdf0e10cSrcweir        }
547*cdf0e10cSrcweir
548*cdf0e10cSrcweir        if ($big5_row_surrogates_first != -1)
549*cdf0e10cSrcweir        {
550*cdf0e10cSrcweir            print OUT "  ", $big5_row_surrogates_first,
551*cdf0e10cSrcweir                      ", /* first low-surrogate */\n";
552*cdf0e10cSrcweir            ++$big5_data_index;
553*cdf0e10cSrcweir
554*cdf0e10cSrcweir            print OUT "  ", printSpaces(7, 10, $big5_row_surrogates_first);
555*cdf0e10cSrcweir            $bol = 0;
556*cdf0e10cSrcweir            for ($big5_column = $big5_row_surrogates_first;
557*cdf0e10cSrcweir                 $big5_column <= $big5_row_surrogates_last;
558*cdf0e10cSrcweir                 ++$big5_column)
559*cdf0e10cSrcweir            {
560*cdf0e10cSrcweir                if ($bol == 1)
561*cdf0e10cSrcweir                {
562*cdf0e10cSrcweir                    print OUT "  ";
563*cdf0e10cSrcweir                    $bol = 0;
564*cdf0e10cSrcweir                }
565*cdf0e10cSrcweir                $utf32 = 0;
566*cdf0e10cSrcweir                if (defined($big5_map[$big5_row][$big5_column]))
567*cdf0e10cSrcweir                {
568*cdf0e10cSrcweir                    $utf32 = $big5_map[$big5_row][$big5_column];
569*cdf0e10cSrcweir                }
570*cdf0e10cSrcweir                if ($utf32 <= 0xFFFF)
571*cdf0e10cSrcweir                {
572*cdf0e10cSrcweir                    printf OUT "     0,";
573*cdf0e10cSrcweir                }
574*cdf0e10cSrcweir                else
575*cdf0e10cSrcweir                {
576*cdf0e10cSrcweir                    printf OUT "0x%04X,",
577*cdf0e10cSrcweir                               (0xDC00 | (($utf32 - 0x10000) & 0x3FF));
578*cdf0e10cSrcweir                }
579*cdf0e10cSrcweir                ++$big5_data_index;
580*cdf0e10cSrcweir                if ($big5_column % 10 == 9)
581*cdf0e10cSrcweir                {
582*cdf0e10cSrcweir                    print OUT "\n";
583*cdf0e10cSrcweir                    $bol = 1;
584*cdf0e10cSrcweir                }
585*cdf0e10cSrcweir            }
586*cdf0e10cSrcweir            if ($bol == 0)
587*cdf0e10cSrcweir            {
588*cdf0e10cSrcweir                print OUT "\n";
589*cdf0e10cSrcweir            }
590*cdf0e10cSrcweir        }
591*cdf0e10cSrcweir
592*cdf0e10cSrcweir        $big5_chars += $big5_row_chars;
593*cdf0e10cSrcweir        $big5_data_space[$big5_row]
594*cdf0e10cSrcweir            = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2;
595*cdf0e10cSrcweir        $big5_data_used[$big5_row]
596*cdf0e10cSrcweir            = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ?
597*cdf0e10cSrcweir                                          0 : 1 + $big5_row_surrogates))
598*cdf0e10cSrcweir                  * 2;
599*cdf0e10cSrcweir    }
600*cdf0e10cSrcweir    else
601*cdf0e10cSrcweir    {
602*cdf0e10cSrcweir        print OUT " /* row ", $big5_row, ": --- */\n";
603*cdf0e10cSrcweir        $big5_data_offsets[$big5_row] = -1;
604*cdf0e10cSrcweir    }
605*cdf0e10cSrcweir}
606*cdf0e10cSrcweirprint OUT "};\n\n";
607*cdf0e10cSrcweirprint "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n";
608*cdf0e10cSrcweir
609*cdf0e10cSrcweirprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
610*cdf0e10cSrcweir$big5_rowoffsets_used = 0;
611*cdf0e10cSrcweirfor ($big5_row = 0; $big5_row <= 255; ++$big5_row)
612*cdf0e10cSrcweir{
613*cdf0e10cSrcweir    if ($big5_data_offsets[$big5_row] == -1)
614*cdf0e10cSrcweir    {
615*cdf0e10cSrcweir        print OUT "  -1, /* row ", $big5_row, " */\n";
616*cdf0e10cSrcweir    }
617*cdf0e10cSrcweir    else
618*cdf0e10cSrcweir    {
619*cdf0e10cSrcweir        print OUT "  ",
620*cdf0e10cSrcweir                  $big5_data_offsets[$big5_row],
621*cdf0e10cSrcweir                  ", /* row ",
622*cdf0e10cSrcweir                  $big5_row,
623*cdf0e10cSrcweir                  "; ",
624*cdf0e10cSrcweir                  printStats($big5_data_used[$big5_row],
625*cdf0e10cSrcweir                             $big5_data_space[$big5_row]),
626*cdf0e10cSrcweir                  " */\n";
627*cdf0e10cSrcweir        $big5_rowoffsets_used += 4;
628*cdf0e10cSrcweir    }
629*cdf0e10cSrcweir}
630*cdf0e10cSrcweirprint OUT "};\n\n";
631*cdf0e10cSrcweir
632*cdf0e10cSrcweirprint OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n";
633*cdf0e10cSrcweir$uni_data_index = 0;
634*cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
635*cdf0e10cSrcweir{
636*cdf0e10cSrcweir    if (defined($uni_plane_used[$uni_plane]))
637*cdf0e10cSrcweir    {
638*cdf0e10cSrcweir        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
639*cdf0e10cSrcweir        {
640*cdf0e10cSrcweir            if (defined($uni_page_used[$uni_plane][$uni_page]))
641*cdf0e10cSrcweir            {
642*cdf0e10cSrcweir                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
643*cdf0e10cSrcweir                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
644*cdf0e10cSrcweir                          " */\n";
645*cdf0e10cSrcweir
646*cdf0e10cSrcweir                $uni_page_first = -1;
647*cdf0e10cSrcweir                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
648*cdf0e10cSrcweir                {
649*cdf0e10cSrcweir                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
650*cdf0e10cSrcweir                    {
651*cdf0e10cSrcweir                        if ($uni_page_first == -1)
652*cdf0e10cSrcweir                        {
653*cdf0e10cSrcweir                            $uni_page_first = $uni_index;
654*cdf0e10cSrcweir                        }
655*cdf0e10cSrcweir                        $uni_page_last = $uni_index;
656*cdf0e10cSrcweir                    }
657*cdf0e10cSrcweir                }
658*cdf0e10cSrcweir
659*cdf0e10cSrcweir                $uni_data_used[$uni_plane][$uni_page] = 0;
660*cdf0e10cSrcweir
661*cdf0e10cSrcweir                print OUT "  ", $uni_page_first, " | (", $uni_page_last,
662*cdf0e10cSrcweir                          " << 8), /* first, last */\n";
663*cdf0e10cSrcweir                ++$uni_data_index;
664*cdf0e10cSrcweir                $uni_data_used[$uni_plane][$uni_page] += 2;
665*cdf0e10cSrcweir
666*cdf0e10cSrcweir                print OUT "  ", printSpaces(7, 10, $uni_page_first);
667*cdf0e10cSrcweir                $bol = 0;
668*cdf0e10cSrcweir                for ($uni_index = $uni_page_first;
669*cdf0e10cSrcweir                     $uni_index <= $uni_page_last;
670*cdf0e10cSrcweir                     ++$uni_index)
671*cdf0e10cSrcweir                {
672*cdf0e10cSrcweir                    if ($bol == 1)
673*cdf0e10cSrcweir                    {
674*cdf0e10cSrcweir                        print OUT "  ";
675*cdf0e10cSrcweir                        $bol = 0;
676*cdf0e10cSrcweir                    }
677*cdf0e10cSrcweir                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
678*cdf0e10cSrcweir                    {
679*cdf0e10cSrcweir                        $big5 = $uni_map[$uni_plane][$uni_page][$uni_index];
680*cdf0e10cSrcweir                        printf OUT "0x%04X,", $big5;
681*cdf0e10cSrcweir                        $uni_data_used[$uni_plane][$uni_page] += 2;
682*cdf0e10cSrcweir                    }
683*cdf0e10cSrcweir                    else
684*cdf0e10cSrcweir                    {
685*cdf0e10cSrcweir                        print OUT "     0,";
686*cdf0e10cSrcweir                    }
687*cdf0e10cSrcweir                    ++$uni_data_index;
688*cdf0e10cSrcweir                    if ($uni_index % 10 == 9)
689*cdf0e10cSrcweir                    {
690*cdf0e10cSrcweir                        print OUT "\n";
691*cdf0e10cSrcweir                        $bol = 1;
692*cdf0e10cSrcweir                    }
693*cdf0e10cSrcweir                }
694*cdf0e10cSrcweir                if ($bol == 0)
695*cdf0e10cSrcweir                {
696*cdf0e10cSrcweir                    print OUT "\n";
697*cdf0e10cSrcweir                }
698*cdf0e10cSrcweir
699*cdf0e10cSrcweir                $uni_data_space[$uni_plane][$uni_page]
700*cdf0e10cSrcweir                    = ($uni_data_index
701*cdf0e10cSrcweir                       - $uni_data_offsets[$uni_plane][$uni_page]) * 2;
702*cdf0e10cSrcweir            }
703*cdf0e10cSrcweir            else
704*cdf0e10cSrcweir            {
705*cdf0e10cSrcweir                $uni_data_offsets[$uni_plane][$uni_page] = -1;
706*cdf0e10cSrcweir                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
707*cdf0e10cSrcweir                          ": --- */\n";
708*cdf0e10cSrcweir            }
709*cdf0e10cSrcweir        }
710*cdf0e10cSrcweir    }
711*cdf0e10cSrcweir    else
712*cdf0e10cSrcweir    {
713*cdf0e10cSrcweir        print OUT " /* plane ", $uni_plane, ": --- */\n";
714*cdf0e10cSrcweir    }
715*cdf0e10cSrcweir}
716*cdf0e10cSrcweirprint OUT "};\n\n";
717*cdf0e10cSrcweir
718*cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
719*cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
720*cdf0e10cSrcweir{
721*cdf0e10cSrcweir    if (defined($uni_plane_used[$uni_plane]))
722*cdf0e10cSrcweir    {
723*cdf0e10cSrcweir        $uni_pageoffsets_used[$uni_plane] = 0;
724*cdf0e10cSrcweir        $uni_data_used_sum[$uni_plane] = 0;
725*cdf0e10cSrcweir        $uni_data_space_sum[$uni_plane] = 0;
726*cdf0e10cSrcweir        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
727*cdf0e10cSrcweir        {
728*cdf0e10cSrcweir            $offset = $uni_data_offsets[$uni_plane][$uni_page];
729*cdf0e10cSrcweir            if ($offset == -1)
730*cdf0e10cSrcweir            {
731*cdf0e10cSrcweir                print OUT "  -1, /* plane ",
732*cdf0e10cSrcweir                          $uni_plane,
733*cdf0e10cSrcweir                          ", page ",
734*cdf0e10cSrcweir                          $uni_page,
735*cdf0e10cSrcweir                          " */\n";
736*cdf0e10cSrcweir            }
737*cdf0e10cSrcweir            else
738*cdf0e10cSrcweir            {
739*cdf0e10cSrcweir                print OUT "  ",
740*cdf0e10cSrcweir                          $offset,
741*cdf0e10cSrcweir                          ", /* plane ",
742*cdf0e10cSrcweir                          $uni_plane,
743*cdf0e10cSrcweir                          ", page ",
744*cdf0e10cSrcweir                          $uni_page,
745*cdf0e10cSrcweir                          "; ",
746*cdf0e10cSrcweir                          printStats($uni_data_used[$uni_plane][$uni_page],
747*cdf0e10cSrcweir                                     $uni_data_space[$uni_plane][$uni_page]),
748*cdf0e10cSrcweir                          " */\n";
749*cdf0e10cSrcweir                $uni_pageoffsets_used[$uni_plane] += 4;
750*cdf0e10cSrcweir                $uni_data_used_sum[$uni_plane]
751*cdf0e10cSrcweir                    += $uni_data_used[$uni_plane][$uni_page];
752*cdf0e10cSrcweir                $uni_data_space_sum[$uni_plane]
753*cdf0e10cSrcweir                    += $uni_data_space[$uni_plane][$uni_page];
754*cdf0e10cSrcweir            }
755*cdf0e10cSrcweir        }
756*cdf0e10cSrcweir    }
757*cdf0e10cSrcweir    else
758*cdf0e10cSrcweir    {
759*cdf0e10cSrcweir        print OUT "  /* plane ", $uni_plane, ": --- */\n";
760*cdf0e10cSrcweir    }
761*cdf0e10cSrcweir}
762*cdf0e10cSrcweirprint OUT "};\n\n";
763*cdf0e10cSrcweir
764*cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo",
765*cdf0e10cSrcweir          $id,
766*cdf0e10cSrcweir          "PlaneOffsets[] = {\n";
767*cdf0e10cSrcweir$uni_page_offset = 0;
768*cdf0e10cSrcweir$uni_planeoffsets_used = 0;
769*cdf0e10cSrcweir$uni_pageoffsets_used_sum = 0;
770*cdf0e10cSrcweir$uni_pageoffsets_space_sum = 0;
771*cdf0e10cSrcweir$uni_data_used_sum2 = 0;
772*cdf0e10cSrcweir$uni_data_space_sum2 = 0;
773*cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
774*cdf0e10cSrcweir{
775*cdf0e10cSrcweir    if (defined ($uni_plane_used[$uni_plane]))
776*cdf0e10cSrcweir    {
777*cdf0e10cSrcweir        print OUT "  ",
778*cdf0e10cSrcweir                  $uni_page_offset++,
779*cdf0e10cSrcweir                  " * 256, /* plane ",
780*cdf0e10cSrcweir                  $uni_plane,
781*cdf0e10cSrcweir                  "; ",
782*cdf0e10cSrcweir                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
783*cdf0e10cSrcweir                  ", ",
784*cdf0e10cSrcweir                  printStats($uni_data_used_sum[$uni_plane],
785*cdf0e10cSrcweir                             $uni_data_space_sum[$uni_plane]),
786*cdf0e10cSrcweir                  " */\n";
787*cdf0e10cSrcweir        $uni_planeoffsets_used += 4;
788*cdf0e10cSrcweir        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
789*cdf0e10cSrcweir        $uni_pageoffsets_space_sum += 256 * 4;
790*cdf0e10cSrcweir        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
791*cdf0e10cSrcweir        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
792*cdf0e10cSrcweir    }
793*cdf0e10cSrcweir    else
794*cdf0e10cSrcweir    {
795*cdf0e10cSrcweir        print OUT "  -1, /* plane ", $uni_plane, " */\n";
796*cdf0e10cSrcweir    }
797*cdf0e10cSrcweir}
798*cdf0e10cSrcweirprint OUT " /* ",
799*cdf0e10cSrcweir          printStats($uni_planeoffsets_used, 17 * 4),
800*cdf0e10cSrcweir          ", ",
801*cdf0e10cSrcweir          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
802*cdf0e10cSrcweir          ", ",
803*cdf0e10cSrcweir          printStats($uni_data_used_sum2, $uni_data_space_sum2),
804*cdf0e10cSrcweir          " */\n};\n";
805*cdf0e10cSrcweir
806*cdf0e10cSrcweirclose OUT;
807