1*b1cdbd2cSJim Jagielski#!/usr/bin/perl
2*b1cdbd2cSJim Jagielski#**************************************************************
3*b1cdbd2cSJim Jagielski#
4*b1cdbd2cSJim Jagielski#  Licensed to the Apache Software Foundation (ASF) under one
5*b1cdbd2cSJim Jagielski#  or more contributor license agreements.  See the NOTICE file
6*b1cdbd2cSJim Jagielski#  distributed with this work for additional information
7*b1cdbd2cSJim Jagielski#  regarding copyright ownership.  The ASF licenses this file
8*b1cdbd2cSJim Jagielski#  to you under the Apache License, Version 2.0 (the
9*b1cdbd2cSJim Jagielski#  "License"); you may not use this file except in compliance
10*b1cdbd2cSJim Jagielski#  with the License.  You may obtain a copy of the License at
11*b1cdbd2cSJim Jagielski#
12*b1cdbd2cSJim Jagielski#    http://www.apache.org/licenses/LICENSE-2.0
13*b1cdbd2cSJim Jagielski#
14*b1cdbd2cSJim Jagielski#  Unless required by applicable law or agreed to in writing,
15*b1cdbd2cSJim Jagielski#  software distributed under the License is distributed on an
16*b1cdbd2cSJim Jagielski#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17*b1cdbd2cSJim Jagielski#  KIND, either express or implied.  See the License for the
18*b1cdbd2cSJim Jagielski#  specific language governing permissions and limitations
19*b1cdbd2cSJim Jagielski#  under the License.
20*b1cdbd2cSJim Jagielski#
21*b1cdbd2cSJim Jagielski#**************************************************************
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski
24*b1cdbd2cSJim Jagielski
25*b1cdbd2cSJim Jagielski# The following files must be available in a ./input subdir:
26*b1cdbd2cSJim Jagielski
27*b1cdbd2cSJim Jagielski# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>:
28*b1cdbd2cSJim Jagielski#  "Unicode version: 3.1.1    Table version: 1.1    Date: 28 June 2001"
29*b1cdbd2cSJim Jagielski#  contains descriptions for:
30*b1cdbd2cSJim Jagielski#   U+3400..4DFF CJK Unified Ideographs Extension A
31*b1cdbd2cSJim Jagielski#   U+4E00..9FFF CJK Unified Ideographs
32*b1cdbd2cSJim Jagielski#   U+F900..FAFF CJK Compatibility Ideographs
33*b1cdbd2cSJim Jagielski#   U+20000..2F7FF CJK Unified Ideographs Extension B
34*b1cdbd2cSJim Jagielski#   U+2F800..2FFFF CJK Compatibility Ideographs Supplement
35*b1cdbd2cSJim Jagielski
36*b1cdbd2cSJim Jagielski# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>:
37*b1cdbd2cSJim Jagielski#  "Unicode version: 1.1    Table version: 0.0d1    Date: 21 October 1994"
38*b1cdbd2cSJim Jagielski#  contains mappings for CNS 11643-1986
39*b1cdbd2cSJim Jagielski
40*b1cdbd2cSJim Jagielski# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>:
41*b1cdbd2cSJim Jagielski#  "Unicode version: 1.1    Table version: 0.49    Date: 26 March 1998"
42*b1cdbd2cSJim Jagielski#  contains mappings for CNS 11643-1992 that are incompatible with
43*b1cdbd2cSJim Jagielski#   CNS11643.TXT
44*b1cdbd2cSJim Jagielski
45*b1cdbd2cSJim Jagielski$id = "Cns116431992";
46*b1cdbd2cSJim Jagielski
47*b1cdbd2cSJim Jagielskisub isValidUtf32
48*b1cdbd2cSJim Jagielski{
49*b1cdbd2cSJim Jagielski    my $utf32 = $_[0];
50*b1cdbd2cSJim Jagielski    return $utf32 >= 0 && $utf32 <= 0x10FFFF
51*b1cdbd2cSJim Jagielski           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
52*b1cdbd2cSJim Jagielski           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
53*b1cdbd2cSJim Jagielski           && ($utf32 & 0xFFFF) < 0xFFFE;
54*b1cdbd2cSJim Jagielski}
55*b1cdbd2cSJim Jagielski
56*b1cdbd2cSJim Jagielskisub printUtf32
57*b1cdbd2cSJim Jagielski{
58*b1cdbd2cSJim Jagielski    my $utf32 = $_[0];
59*b1cdbd2cSJim Jagielski    return sprintf("U+%04X", $utf32);
60*b1cdbd2cSJim Jagielski}
61*b1cdbd2cSJim Jagielski
62*b1cdbd2cSJim Jagielskisub isValidCns116431992
63*b1cdbd2cSJim Jagielski{
64*b1cdbd2cSJim Jagielski    my $plane = $_[0];
65*b1cdbd2cSJim Jagielski    my $row = $_[1];
66*b1cdbd2cSJim Jagielski    my $column = $_[2];
67*b1cdbd2cSJim Jagielski    return $plane >= 1 && $plane <= 16
68*b1cdbd2cSJim Jagielski           && $row >= 1 && $row <= 94
69*b1cdbd2cSJim Jagielski           && $column >= 1 && $column <= 94;
70*b1cdbd2cSJim Jagielski}
71*b1cdbd2cSJim Jagielski
72*b1cdbd2cSJim Jagielskisub printCns116431992
73*b1cdbd2cSJim Jagielski{
74*b1cdbd2cSJim Jagielski    my $plane = $_[0];
75*b1cdbd2cSJim Jagielski    my $row = $_[1];
76*b1cdbd2cSJim Jagielski    my $column = $_[2];
77*b1cdbd2cSJim Jagielski    return sprintf("%d-%02d/%02d", $plane, $row, $column);
78*b1cdbd2cSJim Jagielski}
79*b1cdbd2cSJim Jagielski
80*b1cdbd2cSJim Jagielskisub printStats
81*b1cdbd2cSJim Jagielski{
82*b1cdbd2cSJim Jagielski    my $used = $_[0];
83*b1cdbd2cSJim Jagielski    my $space = $_[1];
84*b1cdbd2cSJim Jagielski    return sprintf("%d/%d bytes (%.1f%%)",
85*b1cdbd2cSJim Jagielski                   $used,
86*b1cdbd2cSJim Jagielski                   $space,
87*b1cdbd2cSJim Jagielski                   $used * 100 / $space);
88*b1cdbd2cSJim Jagielski}
89*b1cdbd2cSJim Jagielski
90*b1cdbd2cSJim Jagielskisub printSpaces
91*b1cdbd2cSJim Jagielski{
92*b1cdbd2cSJim Jagielski    my $column_width = $_[0];
93*b1cdbd2cSJim Jagielski    my $columns_per_line = $_[1];
94*b1cdbd2cSJim Jagielski    my $end = $_[2];
95*b1cdbd2cSJim Jagielski    $output = "";
96*b1cdbd2cSJim Jagielski    for ($i = int($end / $columns_per_line) * $columns_per_line;
97*b1cdbd2cSJim Jagielski         $i < $end;
98*b1cdbd2cSJim Jagielski         ++$i)
99*b1cdbd2cSJim Jagielski    {
100*b1cdbd2cSJim Jagielski        for ($j = 0; $j < $column_width; ++$j)
101*b1cdbd2cSJim Jagielski        {
102*b1cdbd2cSJim Jagielski            $output = $output . " ";
103*b1cdbd2cSJim Jagielski        }
104*b1cdbd2cSJim Jagielski    }
105*b1cdbd2cSJim Jagielski    return $output;
106*b1cdbd2cSJim Jagielski}
107*b1cdbd2cSJim Jagielski
108*b1cdbd2cSJim Jagielski$count_Unihan_txt = 0;
109*b1cdbd2cSJim Jagielski$count_CNS11643_TXT = 0;
110*b1cdbd2cSJim Jagielski$count_Uni2CNS = 0;
111*b1cdbd2cSJim Jagielski
112*b1cdbd2cSJim Jagielskiif (1)
113*b1cdbd2cSJim Jagielski{
114*b1cdbd2cSJim Jagielski    $filename = "Unihan.txt";
115*b1cdbd2cSJim Jagielski    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
116*b1cdbd2cSJim Jagielski    while (<IN>)
117*b1cdbd2cSJim Jagielski    {
118*b1cdbd2cSJim Jagielski        if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
119*b1cdbd2cSJim Jagielski        {
120*b1cdbd2cSJim Jagielski            $utf32 = oct("0x" . $1);
121*b1cdbd2cSJim Jagielski            $cns_plane = oct("0x" . $2);
122*b1cdbd2cSJim Jagielski            $cns_row = oct("0x" . $3) - 0x20;
123*b1cdbd2cSJim Jagielski            $cns_column = oct("0x" . $4) - 0x20;
124*b1cdbd2cSJim Jagielski            isValidUtf32($utf32)
125*b1cdbd2cSJim Jagielski                or die "Bad UTF32 char U+" . printUtf32($utf32);
126*b1cdbd2cSJim Jagielski            isValidCns116431992($cns_plane, $cns_row, $cns_column)
127*b1cdbd2cSJim Jagielski                or die "Bad CNS11643-1992 char "
128*b1cdbd2cSJim Jagielski                           . printCns116431992($cns_plane,
129*b1cdbd2cSJim Jagielski                                               $cns_row,
130*b1cdbd2cSJim Jagielski                                               $cns_column);
131*b1cdbd2cSJim Jagielski            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
132*b1cdbd2cSJim Jagielski            {
133*b1cdbd2cSJim Jagielski                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
134*b1cdbd2cSJim Jagielski                $cns_plane_used[$cns_plane] = 1;
135*b1cdbd2cSJim Jagielski                ++$count_Unihan_txt;
136*b1cdbd2cSJim Jagielski            }
137*b1cdbd2cSJim Jagielski            else
138*b1cdbd2cSJim Jagielski            {
139*b1cdbd2cSJim Jagielski                ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
140*b1cdbd2cSJim Jagielski                    or die "Mapping "
141*b1cdbd2cSJim Jagielski                               . printCns116431992($cns_plane,
142*b1cdbd2cSJim Jagielski                                                   $cns_row,
143*b1cdbd2cSJim Jagielski                                                   $cns_column)
144*b1cdbd2cSJim Jagielski                               . " to "
145*b1cdbd2cSJim Jagielski                               . printUtf32($cns_map[$cns_plane]
146*b1cdbd2cSJim Jagielski                                                    [$cns_row]
147*b1cdbd2cSJim Jagielski                                                    [$cns_column])
148*b1cdbd2cSJim Jagielski                               . ", NOT "
149*b1cdbd2cSJim Jagielski                               . printUtf32($utf32);
150*b1cdbd2cSJim Jagielski            }
151*b1cdbd2cSJim Jagielski        }
152*b1cdbd2cSJim Jagielski        elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
153*b1cdbd2cSJim Jagielski        {
154*b1cdbd2cSJim Jagielski            $utf32 = oct("0x" . $1);
155*b1cdbd2cSJim Jagielski            $cns_plane = oct("0x" . $2);
156*b1cdbd2cSJim Jagielski            $cns_row = oct("0x" . $3) - 0x20;
157*b1cdbd2cSJim Jagielski            $cns_column = oct("0x" . $4) - 0x20;
158*b1cdbd2cSJim Jagielski            isValidUtf32($utf32)
159*b1cdbd2cSJim Jagielski                or die "Bad UTF32 char U+" . printUtf32($utf32);
160*b1cdbd2cSJim Jagielski            isValidCns116431992($cns_plane, $cns_row, $cns_column)
161*b1cdbd2cSJim Jagielski                or die "Bad CNS11643-1992 char "
162*b1cdbd2cSJim Jagielski                           . printCns116431992($cns_plane,
163*b1cdbd2cSJim Jagielski                                               $cns_row,
164*b1cdbd2cSJim Jagielski                                               $cns_column);
165*b1cdbd2cSJim Jagielski            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
166*b1cdbd2cSJim Jagielski            {
167*b1cdbd2cSJim Jagielski                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
168*b1cdbd2cSJim Jagielski                $cns_plane_used[$cns_plane] = 1;
169*b1cdbd2cSJim Jagielski                ++$count_Unihan_txt;
170*b1cdbd2cSJim Jagielski            }
171*b1cdbd2cSJim Jagielski            else
172*b1cdbd2cSJim Jagielski            {
173*b1cdbd2cSJim Jagielski                ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
174*b1cdbd2cSJim Jagielski                    or print "WARNING!  Mapping ",
175*b1cdbd2cSJim Jagielski                             printCns116431992($cns_plane,
176*b1cdbd2cSJim Jagielski                                               $cns_row,
177*b1cdbd2cSJim Jagielski                                               $cns_column),
178*b1cdbd2cSJim Jagielski                             " to ",
179*b1cdbd2cSJim Jagielski                             printUtf32($cns_map[$cns_plane]
180*b1cdbd2cSJim Jagielski                                                [$cns_row]
181*b1cdbd2cSJim Jagielski                                                [$cns_column]),
182*b1cdbd2cSJim Jagielski                             ", NOT ",
183*b1cdbd2cSJim Jagielski                             printUtf32($utf32),
184*b1cdbd2cSJim Jagielski                             "\n";
185*b1cdbd2cSJim Jagielski            }
186*b1cdbd2cSJim Jagielski        }
187*b1cdbd2cSJim Jagielski        elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/)
188*b1cdbd2cSJim Jagielski        {
189*b1cdbd2cSJim Jagielski            die "Bad format";
190*b1cdbd2cSJim Jagielski        }
191*b1cdbd2cSJim Jagielski    }
192*b1cdbd2cSJim Jagielski    close IN;
193*b1cdbd2cSJim Jagielski}
194*b1cdbd2cSJim Jagielski
195*b1cdbd2cSJim Jagielskiif (1)
196*b1cdbd2cSJim Jagielski{
197*b1cdbd2cSJim Jagielski    $filename = "CNS11643.TXT";
198*b1cdbd2cSJim Jagielski    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
199*b1cdbd2cSJim Jagielski    while (<IN>)
200*b1cdbd2cSJim Jagielski    {
201*b1cdbd2cSJim Jagielski        if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/)
202*b1cdbd2cSJim Jagielski        {
203*b1cdbd2cSJim Jagielski            $utf32 = oct("0x" . $4);
204*b1cdbd2cSJim Jagielski            $cns_plane = oct("0x" . $1);
205*b1cdbd2cSJim Jagielski            $cns_row = oct("0x" . $2) - 0x20;
206*b1cdbd2cSJim Jagielski            $cns_column = oct("0x" . $3) - 0x20;
207*b1cdbd2cSJim Jagielski            isValidUtf32($utf32)
208*b1cdbd2cSJim Jagielski                or die "Bad UTF32 char U+" . printUtf32($utf32);
209*b1cdbd2cSJim Jagielski            isValidCns116431992($cns_plane, $cns_row, $cns_column)
210*b1cdbd2cSJim Jagielski                or die "Bad CNS11643-1992 char "
211*b1cdbd2cSJim Jagielski                           . printCns116431992($cns_plane,
212*b1cdbd2cSJim Jagielski                                               $cns_row,
213*b1cdbd2cSJim Jagielski                                               $cns_column);
214*b1cdbd2cSJim Jagielski            if ($cns_plane <= 2)
215*b1cdbd2cSJim Jagielski            {
216*b1cdbd2cSJim Jagielski                if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
217*b1cdbd2cSJim Jagielski                {
218*b1cdbd2cSJim Jagielski                    $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
219*b1cdbd2cSJim Jagielski                    $cns_plane_used[$cns_plane] = 1;
220*b1cdbd2cSJim Jagielski                    ++$count_CNS11643_TXT;
221*b1cdbd2cSJim Jagielski                }
222*b1cdbd2cSJim Jagielski                else
223*b1cdbd2cSJim Jagielski                {
224*b1cdbd2cSJim Jagielski                    ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
225*b1cdbd2cSJim Jagielski                        or die "Mapping "
226*b1cdbd2cSJim Jagielski                                   . printCns116431992($cns_plane,
227*b1cdbd2cSJim Jagielski                                                       $cns_row,
228*b1cdbd2cSJim Jagielski                                                       $cns_column)
229*b1cdbd2cSJim Jagielski                                   . " to "
230*b1cdbd2cSJim Jagielski                                   . printUtf32($cns_map[$cns_plane]
231*b1cdbd2cSJim Jagielski                                                        [$cns_row]
232*b1cdbd2cSJim Jagielski                                                        [$cns_column])
233*b1cdbd2cSJim Jagielski                                   . ", NOT "
234*b1cdbd2cSJim Jagielski                                   . printUtf32($utf32);
235*b1cdbd2cSJim Jagielski                }
236*b1cdbd2cSJim Jagielski            }
237*b1cdbd2cSJim Jagielski        }
238*b1cdbd2cSJim Jagielski    }
239*b1cdbd2cSJim Jagielski    close IN;
240*b1cdbd2cSJim Jagielski}
241*b1cdbd2cSJim Jagielski
242*b1cdbd2cSJim Jagielskiif (0)
243*b1cdbd2cSJim Jagielski{
244*b1cdbd2cSJim Jagielski    $filename = "Uni2CNS";
245*b1cdbd2cSJim Jagielski    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
246*b1cdbd2cSJim Jagielski    while (<IN>)
247*b1cdbd2cSJim Jagielski    {
248*b1cdbd2cSJim Jagielski        if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/)
249*b1cdbd2cSJim Jagielski        {
250*b1cdbd2cSJim Jagielski            $utf32 = oct("0x" . $1);
251*b1cdbd2cSJim Jagielski            $cns_plane = oct("0x" . $2);
252*b1cdbd2cSJim Jagielski            $cns_row = oct("0x" . $3) - 0x20;
253*b1cdbd2cSJim Jagielski            $cns_column = oct("0x" . $4) - 0x20;
254*b1cdbd2cSJim Jagielski            isValidUtf32($utf32)
255*b1cdbd2cSJim Jagielski                or die "Bad UTF32 char U+" . printUtf32($utf32);
256*b1cdbd2cSJim Jagielski            isValidCns116431992($cns_plane, $cns_row, $cns_column)
257*b1cdbd2cSJim Jagielski                or die "Bad CNS11643-1992 char "
258*b1cdbd2cSJim Jagielski                           . printCns116431992($cns_plane,
259*b1cdbd2cSJim Jagielski                                               $cns_row,
260*b1cdbd2cSJim Jagielski                                               $cns_column);
261*b1cdbd2cSJim Jagielski            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
262*b1cdbd2cSJim Jagielski            {
263*b1cdbd2cSJim Jagielski                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
264*b1cdbd2cSJim Jagielski                $cns_plane_used[$cns_plane] = 1;
265*b1cdbd2cSJim Jagielski                ++$count_Uni2CNS;
266*b1cdbd2cSJim Jagielski            }
267*b1cdbd2cSJim Jagielski            else
268*b1cdbd2cSJim Jagielski            {
269*b1cdbd2cSJim Jagielski#               ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
270*b1cdbd2cSJim Jagielski#                   or die "Mapping "
271*b1cdbd2cSJim Jagielski#                              . printCns116431992($cns_plane,
272*b1cdbd2cSJim Jagielski#                                                  $cns_row,
273*b1cdbd2cSJim Jagielski#                                                  $cns_column)
274*b1cdbd2cSJim Jagielski#                              . " to "
275*b1cdbd2cSJim Jagielski#                              . printUtf32($cns_map[$cns_plane]
276*b1cdbd2cSJim Jagielski#                                                   [$cns_row]
277*b1cdbd2cSJim Jagielski#                                                   [$cns_column])
278*b1cdbd2cSJim Jagielski#                              . ", NOT "
279*b1cdbd2cSJim Jagielski#                              . printUtf32($utf32);
280*b1cdbd2cSJim Jagielski            }
281*b1cdbd2cSJim Jagielski            if ($cns_plane == 1)
282*b1cdbd2cSJim Jagielski            {
283*b1cdbd2cSJim Jagielski                print printCns116431992($cns_plane, $cns_row, $cns_column),
284*b1cdbd2cSJim Jagielski                      "\n";
285*b1cdbd2cSJim Jagielski            }
286*b1cdbd2cSJim Jagielski        }
287*b1cdbd2cSJim Jagielski    }
288*b1cdbd2cSJim Jagielski    close IN;
289*b1cdbd2cSJim Jagielski}
290*b1cdbd2cSJim Jagielski
291*b1cdbd2cSJim Jagielskifor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
292*b1cdbd2cSJim Jagielski{
293*b1cdbd2cSJim Jagielski    if (defined($cns_plane_used[$cns_plane]))
294*b1cdbd2cSJim Jagielski    {
295*b1cdbd2cSJim Jagielski        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
296*b1cdbd2cSJim Jagielski        {
297*b1cdbd2cSJim Jagielski            for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
298*b1cdbd2cSJim Jagielski            {
299*b1cdbd2cSJim Jagielski                if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
300*b1cdbd2cSJim Jagielski                {
301*b1cdbd2cSJim Jagielski                    $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
302*b1cdbd2cSJim Jagielski                    $uni_plane = $utf32 >> 16;
303*b1cdbd2cSJim Jagielski                    $uni_page = ($utf32 >> 8) & 0xFF;
304*b1cdbd2cSJim Jagielski                    $uni_index = $utf32 & 0xFF;
305*b1cdbd2cSJim Jagielski                    if (!defined($uni_plane_used[$uni_plane])
306*b1cdbd2cSJim Jagielski                        || !defined($uni_page_used[$uni_plane][$uni_page])
307*b1cdbd2cSJim Jagielski                        || !defined($uni_map[$uni_plane]
308*b1cdbd2cSJim Jagielski                                            [$uni_page]
309*b1cdbd2cSJim Jagielski                                            [$uni_index]))
310*b1cdbd2cSJim Jagielski                    {
311*b1cdbd2cSJim Jagielski                        $uni_map[$uni_plane][$uni_page][$uni_index]
312*b1cdbd2cSJim Jagielski                            = ($cns_plane << 16)
313*b1cdbd2cSJim Jagielski                                  | ($cns_row << 8)
314*b1cdbd2cSJim Jagielski                                  | $cns_column;
315*b1cdbd2cSJim Jagielski                        $uni_plane_used[$uni_plane] = 1;
316*b1cdbd2cSJim Jagielski                        $uni_page_used[$uni_plane][$uni_page] = 1;
317*b1cdbd2cSJim Jagielski                    }
318*b1cdbd2cSJim Jagielski                    else
319*b1cdbd2cSJim Jagielski                    {
320*b1cdbd2cSJim Jagielski                        $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index];
321*b1cdbd2cSJim Jagielski                        $cns1_plane = $cns1 >> 16;
322*b1cdbd2cSJim Jagielski                        $cns1_row = ($cns1 >> 8) & 0xFF;
323*b1cdbd2cSJim Jagielski                        $cns1_column = $cns1 & 0xFF;
324*b1cdbd2cSJim Jagielski
325*b1cdbd2cSJim Jagielski                        # Do not map from Unicode to Fictious Character Set
326*b1cdbd2cSJim Jagielski                        # Extensions (Lunde, p. 131), if possible:
327*b1cdbd2cSJim Jagielski                        if ($cns_plane == 3
328*b1cdbd2cSJim Jagielski                            && ($cns_row == 66 && $cns_column > 38
329*b1cdbd2cSJim Jagielski                                || $cns_row > 66))
330*b1cdbd2cSJim Jagielski                        {
331*b1cdbd2cSJim Jagielski                            print " (",
332*b1cdbd2cSJim Jagielski                                  printUtf32($utf32),
333*b1cdbd2cSJim Jagielski                                  " to fictious ",
334*b1cdbd2cSJim Jagielski                                  printCns116431992($cns_plane,
335*b1cdbd2cSJim Jagielski                                                    $cns_row,
336*b1cdbd2cSJim Jagielski                                                    $cns_column),
337*b1cdbd2cSJim Jagielski                                  " ignored, favouring ",
338*b1cdbd2cSJim Jagielski                                  printCns116431992($cns1_plane,
339*b1cdbd2cSJim Jagielski                                                    $cns1_row,
340*b1cdbd2cSJim Jagielski                                                    $cns1_column),
341*b1cdbd2cSJim Jagielski                                  ")\n";
342*b1cdbd2cSJim Jagielski                        }
343*b1cdbd2cSJim Jagielski                        elsif ($cns1_plane == 3
344*b1cdbd2cSJim Jagielski                               && ($cns1_row == 66 && $cns1_column > 38
345*b1cdbd2cSJim Jagielski                                   || $cns1_row > 66))
346*b1cdbd2cSJim Jagielski                        {
347*b1cdbd2cSJim Jagielski                            $uni_map[$uni_plane][$uni_page][$uni_index]
348*b1cdbd2cSJim Jagielski                                = ($cns_plane << 16)
349*b1cdbd2cSJim Jagielski                                       | ($cns_row << 8)
350*b1cdbd2cSJim Jagielski                                       | $cns_column;
351*b1cdbd2cSJim Jagielski                            print " (",
352*b1cdbd2cSJim Jagielski                                  printUtf32($utf32),
353*b1cdbd2cSJim Jagielski                                  " to fictious ",
354*b1cdbd2cSJim Jagielski                                  printCns116431992($cns1_plane,
355*b1cdbd2cSJim Jagielski                                                    $cns1_row,
356*b1cdbd2cSJim Jagielski                                                    $cns1_column),
357*b1cdbd2cSJim Jagielski                                  " ignored, favouring ",
358*b1cdbd2cSJim Jagielski                                  printCns116431992($cns_plane,
359*b1cdbd2cSJim Jagielski                                                    $cns_row,
360*b1cdbd2cSJim Jagielski                                                    $cns_column),
361*b1cdbd2cSJim Jagielski                                  ")\n";
362*b1cdbd2cSJim Jagielski                        }
363*b1cdbd2cSJim Jagielski                        else
364*b1cdbd2cSJim Jagielski                        {
365*b1cdbd2cSJim Jagielski                            print "WARNING!  Mapping ",
366*b1cdbd2cSJim Jagielski                                  printUtf32($utf32),
367*b1cdbd2cSJim Jagielski                                  " to ",
368*b1cdbd2cSJim Jagielski                                  printCns116431992($cns1_plane,
369*b1cdbd2cSJim Jagielski                                                    $cns1_row,
370*b1cdbd2cSJim Jagielski                                                    $cns1_column),
371*b1cdbd2cSJim Jagielski                                  ", NOT ",
372*b1cdbd2cSJim Jagielski                                  printCns116431992($cns_plane,
373*b1cdbd2cSJim Jagielski                                                    $cns_row,
374*b1cdbd2cSJim Jagielski                                                    $cns_column),
375*b1cdbd2cSJim Jagielski                                  "\n";
376*b1cdbd2cSJim Jagielski                        }
377*b1cdbd2cSJim Jagielski                    }
378*b1cdbd2cSJim Jagielski                }
379*b1cdbd2cSJim Jagielski            }
380*b1cdbd2cSJim Jagielski        }
381*b1cdbd2cSJim Jagielski    }
382*b1cdbd2cSJim Jagielski}
383*b1cdbd2cSJim Jagielskiif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
384*b1cdbd2cSJim Jagielski{
385*b1cdbd2cSJim Jagielski    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
386*b1cdbd2cSJim Jagielski    {
387*b1cdbd2cSJim Jagielski        if (defined($uni_map[0][0][$uni_index]))
388*b1cdbd2cSJim Jagielski        {
389*b1cdbd2cSJim Jagielski            $cns = $uni_map[0][0][$utf32];
390*b1cdbd2cSJim Jagielski            die "Mapping "
391*b1cdbd2cSJim Jagielski                    . printUtf32($utf32)
392*b1cdbd2cSJim Jagielski                    . " to "
393*b1cdbd2cSJim Jagielski                    . printCns116431992($cns >> 16,
394*b1cdbd2cSJim Jagielski                                        ($cns >> 8) & 0xFF,
395*b1cdbd2cSJim Jagielski                                        $cns & 0xFF);
396*b1cdbd2cSJim Jagielski        }
397*b1cdbd2cSJim Jagielski    }
398*b1cdbd2cSJim Jagielski}
399*b1cdbd2cSJim Jagielski
400*b1cdbd2cSJim Jagielski$filename = lc($id) . ".tab";
401*b1cdbd2cSJim Jagielskiopen OUT, ("> " . $filename) or die "Cannot write " . $filename;
402*b1cdbd2cSJim Jagielski
403*b1cdbd2cSJim Jagielski{
404*b1cdbd2cSJim Jagielski    $filename = lc($id). ".pl";
405*b1cdbd2cSJim Jagielski    open IN, $filename or die "Cannot read ". $filename;
406*b1cdbd2cSJim Jagielski    $first = 1;
407*b1cdbd2cSJim Jagielski    while (<IN>)
408*b1cdbd2cSJim Jagielski    {
409*b1cdbd2cSJim Jagielski        if (/^\#!.*$/)
410*b1cdbd2cSJim Jagielski        {
411*b1cdbd2cSJim Jagielski        }
412*b1cdbd2cSJim Jagielski        elsif (/^\#(\*.*)$/)
413*b1cdbd2cSJim Jagielski        {
414*b1cdbd2cSJim Jagielski            if ($first == 1)
415*b1cdbd2cSJim Jagielski            {
416*b1cdbd2cSJim Jagielski                print OUT "/", $1, "\n";
417*b1cdbd2cSJim Jagielski                $first = 0;
418*b1cdbd2cSJim Jagielski            }
419*b1cdbd2cSJim Jagielski            else
420*b1cdbd2cSJim Jagielski            {
421*b1cdbd2cSJim Jagielski                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
422*b1cdbd2cSJim Jagielski            }
423*b1cdbd2cSJim Jagielski        }
424*b1cdbd2cSJim Jagielski        elsif (/^\# (.*)$/)
425*b1cdbd2cSJim Jagielski        {
426*b1cdbd2cSJim Jagielski            print OUT " *", $1, "\n";
427*b1cdbd2cSJim Jagielski        }
428*b1cdbd2cSJim Jagielski        elsif (/^\#(.*)$/)
429*b1cdbd2cSJim Jagielski        {
430*b1cdbd2cSJim Jagielski            print OUT " *", $1, "\n";
431*b1cdbd2cSJim Jagielski        }
432*b1cdbd2cSJim Jagielski        else
433*b1cdbd2cSJim Jagielski        {
434*b1cdbd2cSJim Jagielski            goto done;
435*b1cdbd2cSJim Jagielski        }
436*b1cdbd2cSJim Jagielski    }
437*b1cdbd2cSJim Jagielski  done:
438*b1cdbd2cSJim Jagielski}
439*b1cdbd2cSJim Jagielski
440*b1cdbd2cSJim Jagielskiprint OUT "\n",
441*b1cdbd2cSJim Jagielski          "#ifndef _SAL_TYPES_H_\n",
442*b1cdbd2cSJim Jagielski          "#include \"sal/types.h\"\n",
443*b1cdbd2cSJim Jagielski          "#endif\n",
444*b1cdbd2cSJim Jagielski          "\n";
445*b1cdbd2cSJim Jagielski
446*b1cdbd2cSJim Jagielskiprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
447*b1cdbd2cSJim Jagielski$cns_data_index = 0;
448*b1cdbd2cSJim Jagielskifor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
449*b1cdbd2cSJim Jagielski{
450*b1cdbd2cSJim Jagielski    if (defined($cns_plane_used[$cns_plane]))
451*b1cdbd2cSJim Jagielski    {
452*b1cdbd2cSJim Jagielski        $cns_rows = 0;
453*b1cdbd2cSJim Jagielski        $cns_chars = 0;
454*b1cdbd2cSJim Jagielski        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
455*b1cdbd2cSJim Jagielski        {
456*b1cdbd2cSJim Jagielski            $cns_row_first = -1;
457*b1cdbd2cSJim Jagielski            for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
458*b1cdbd2cSJim Jagielski            {
459*b1cdbd2cSJim Jagielski                if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
460*b1cdbd2cSJim Jagielski                {
461*b1cdbd2cSJim Jagielski                    if ($cns_row_first == -1)
462*b1cdbd2cSJim Jagielski                    {
463*b1cdbd2cSJim Jagielski                        $cns_row_first = $cns_column;
464*b1cdbd2cSJim Jagielski                    }
465*b1cdbd2cSJim Jagielski                    $cns_row_last = $cns_column;
466*b1cdbd2cSJim Jagielski                }
467*b1cdbd2cSJim Jagielski            }
468*b1cdbd2cSJim Jagielski            if ($cns_row_first != -1)
469*b1cdbd2cSJim Jagielski            {
470*b1cdbd2cSJim Jagielski                $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index;
471*b1cdbd2cSJim Jagielski                ++$cns_rows;
472*b1cdbd2cSJim Jagielski                print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
473*b1cdbd2cSJim Jagielski                          " */\n";
474*b1cdbd2cSJim Jagielski
475*b1cdbd2cSJim Jagielski                $cns_row_surrogates_first = -1;
476*b1cdbd2cSJim Jagielski                $cns_row_chars = 0;
477*b1cdbd2cSJim Jagielski                $cns_row_surrogates = 0;
478*b1cdbd2cSJim Jagielski
479*b1cdbd2cSJim Jagielski                print OUT "  ", $cns_row_first, " | (", $cns_row_last,
480*b1cdbd2cSJim Jagielski                          " << 8), /* first, last */\n";
481*b1cdbd2cSJim Jagielski                ++$cns_data_index;
482*b1cdbd2cSJim Jagielski
483*b1cdbd2cSJim Jagielski                print OUT "  ", printSpaces(7, 10, $cns_row_first);
484*b1cdbd2cSJim Jagielski                $bol = 0;
485*b1cdbd2cSJim Jagielski                for ($cns_column = $cns_row_first;
486*b1cdbd2cSJim Jagielski                     $cns_column <= $cns_row_last;
487*b1cdbd2cSJim Jagielski                     ++$cns_column)
488*b1cdbd2cSJim Jagielski                {
489*b1cdbd2cSJim Jagielski                    if ($bol == 1)
490*b1cdbd2cSJim Jagielski                    {
491*b1cdbd2cSJim Jagielski                        print OUT "  ";
492*b1cdbd2cSJim Jagielski                        $bol = 0;
493*b1cdbd2cSJim Jagielski                    }
494*b1cdbd2cSJim Jagielski                    if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
495*b1cdbd2cSJim Jagielski                    {
496*b1cdbd2cSJim Jagielski                        $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
497*b1cdbd2cSJim Jagielski                        ++$cns_row_chars;
498*b1cdbd2cSJim Jagielski                        if ($utf32 <= 0xFFFF)
499*b1cdbd2cSJim Jagielski                        {
500*b1cdbd2cSJim Jagielski                            printf OUT "0x%04X,", $utf32;
501*b1cdbd2cSJim Jagielski                        }
502*b1cdbd2cSJim Jagielski                        else
503*b1cdbd2cSJim Jagielski                        {
504*b1cdbd2cSJim Jagielski                            ++$cns_row_surrogates;
505*b1cdbd2cSJim Jagielski                            printf OUT "0x%04X,",
506*b1cdbd2cSJim Jagielski                                       (0xD800 | (($utf32 - 0x10000) >> 10));
507*b1cdbd2cSJim Jagielski                            if ($cns_row_surrogates_first == -1)
508*b1cdbd2cSJim Jagielski                            {
509*b1cdbd2cSJim Jagielski                                $cns_row_surrogates_first = $cns_column;
510*b1cdbd2cSJim Jagielski                            }
511*b1cdbd2cSJim Jagielski                            $cns_row_surrogates_last = $cns_column;
512*b1cdbd2cSJim Jagielski                        }
513*b1cdbd2cSJim Jagielski                    }
514*b1cdbd2cSJim Jagielski                    else
515*b1cdbd2cSJim Jagielski                    {
516*b1cdbd2cSJim Jagielski                        printf OUT "0xffff,";
517*b1cdbd2cSJim Jagielski                    }
518*b1cdbd2cSJim Jagielski                    ++$cns_data_index;
519*b1cdbd2cSJim Jagielski                    if ($cns_column % 10 == 9)
520*b1cdbd2cSJim Jagielski                    {
521*b1cdbd2cSJim Jagielski                        print OUT "\n";
522*b1cdbd2cSJim Jagielski                        $bol = 1;
523*b1cdbd2cSJim Jagielski                    }
524*b1cdbd2cSJim Jagielski                }
525*b1cdbd2cSJim Jagielski                if ($bol == 0)
526*b1cdbd2cSJim Jagielski                {
527*b1cdbd2cSJim Jagielski                    print OUT "\n";
528*b1cdbd2cSJim Jagielski                }
529*b1cdbd2cSJim Jagielski
530*b1cdbd2cSJim Jagielski                if ($cns_row_surrogates_first != -1)
531*b1cdbd2cSJim Jagielski                {
532*b1cdbd2cSJim Jagielski                    print OUT "  ", $cns_row_surrogates_first,
533*b1cdbd2cSJim Jagielski                              ", /* first low-surrogate */\n";
534*b1cdbd2cSJim Jagielski                    ++$cns_data_index;
535*b1cdbd2cSJim Jagielski
536*b1cdbd2cSJim Jagielski                    print OUT "  ",
537*b1cdbd2cSJim Jagielski                              printSpaces(7, 10, $cns_row_surrogates_first);
538*b1cdbd2cSJim Jagielski                    $bol = 0;
539*b1cdbd2cSJim Jagielski                    for ($cns_column = $cns_row_surrogates_first;
540*b1cdbd2cSJim Jagielski                         $cns_column <= $cns_row_surrogates_last;
541*b1cdbd2cSJim Jagielski                         ++$cns_column)
542*b1cdbd2cSJim Jagielski                    {
543*b1cdbd2cSJim Jagielski                        if ($bol == 1)
544*b1cdbd2cSJim Jagielski                        {
545*b1cdbd2cSJim Jagielski                            print OUT "  ";
546*b1cdbd2cSJim Jagielski                            $bol = 0;
547*b1cdbd2cSJim Jagielski                        }
548*b1cdbd2cSJim Jagielski                        $utf32 = 0;
549*b1cdbd2cSJim Jagielski                        if (defined($cns_map[$cns_plane]
550*b1cdbd2cSJim Jagielski                                            [$cns_row]
551*b1cdbd2cSJim Jagielski                                            [$cns_column]))
552*b1cdbd2cSJim Jagielski                        {
553*b1cdbd2cSJim Jagielski                            $utf32
554*b1cdbd2cSJim Jagielski                                = $cns_map[$cns_plane][$cns_row][$cns_column];
555*b1cdbd2cSJim Jagielski                        }
556*b1cdbd2cSJim Jagielski                        if ($utf32 <= 0xFFFF)
557*b1cdbd2cSJim Jagielski                        {
558*b1cdbd2cSJim Jagielski                            printf OUT "     0,";
559*b1cdbd2cSJim Jagielski                        }
560*b1cdbd2cSJim Jagielski                        else
561*b1cdbd2cSJim Jagielski                        {
562*b1cdbd2cSJim Jagielski                            printf OUT "0x%04X,",
563*b1cdbd2cSJim Jagielski                                       (0xDC00
564*b1cdbd2cSJim Jagielski                                            | (($utf32 - 0x10000) & 0x3FF));
565*b1cdbd2cSJim Jagielski                        }
566*b1cdbd2cSJim Jagielski                        ++$cns_data_index;
567*b1cdbd2cSJim Jagielski                        if ($cns_column % 10 == 9)
568*b1cdbd2cSJim Jagielski                        {
569*b1cdbd2cSJim Jagielski                            print OUT "\n";
570*b1cdbd2cSJim Jagielski                            $bol = 1;
571*b1cdbd2cSJim Jagielski                        }
572*b1cdbd2cSJim Jagielski                    }
573*b1cdbd2cSJim Jagielski                    if ($bol == 0)
574*b1cdbd2cSJim Jagielski                    {
575*b1cdbd2cSJim Jagielski                        print OUT "\n";
576*b1cdbd2cSJim Jagielski                    }
577*b1cdbd2cSJim Jagielski                }
578*b1cdbd2cSJim Jagielski
579*b1cdbd2cSJim Jagielski                $cns_chars += $cns_row_chars;
580*b1cdbd2cSJim Jagielski                $cns_data_space[$cns_plane][$cns_row]
581*b1cdbd2cSJim Jagielski                    = ($cns_data_index
582*b1cdbd2cSJim Jagielski                           - $cns_data_offsets[$cns_plane][$cns_row]) * 2;
583*b1cdbd2cSJim Jagielski                $cns_data_used[$cns_plane][$cns_row]
584*b1cdbd2cSJim Jagielski                    = (1 + $cns_row_chars
585*b1cdbd2cSJim Jagielski                           + ($cns_row_surrogates == 0 ?
586*b1cdbd2cSJim Jagielski                                  0 : 1 + $cns_row_surrogates)) * 2;
587*b1cdbd2cSJim Jagielski            }
588*b1cdbd2cSJim Jagielski            else
589*b1cdbd2cSJim Jagielski            {
590*b1cdbd2cSJim Jagielski                print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
591*b1cdbd2cSJim Jagielski                          ": --- */\n";
592*b1cdbd2cSJim Jagielski                $cns_data_offsets[$cns_plane][$cns_row] = -1;
593*b1cdbd2cSJim Jagielski            }
594*b1cdbd2cSJim Jagielski        }
595*b1cdbd2cSJim Jagielski        print "cns plane ",
596*b1cdbd2cSJim Jagielski              $cns_plane,
597*b1cdbd2cSJim Jagielski              ": rows = ",
598*b1cdbd2cSJim Jagielski              $cns_rows,
599*b1cdbd2cSJim Jagielski              ", chars = ",
600*b1cdbd2cSJim Jagielski              $cns_chars,
601*b1cdbd2cSJim Jagielski              "\n";
602*b1cdbd2cSJim Jagielski    }
603*b1cdbd2cSJim Jagielski}
604*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
605*b1cdbd2cSJim Jagielski
606*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
607*b1cdbd2cSJim Jagielskifor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
608*b1cdbd2cSJim Jagielski{
609*b1cdbd2cSJim Jagielski    if (defined ($cns_plane_used[$cns_plane]))
610*b1cdbd2cSJim Jagielski    {
611*b1cdbd2cSJim Jagielski        $cns_rowoffsets_used[$cns_plane] = 0;
612*b1cdbd2cSJim Jagielski        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
613*b1cdbd2cSJim Jagielski        {
614*b1cdbd2cSJim Jagielski            if ($cns_data_offsets[$cns_plane][$cns_row] == -1)
615*b1cdbd2cSJim Jagielski            {
616*b1cdbd2cSJim Jagielski                print OUT "  -1, /* plane ",
617*b1cdbd2cSJim Jagielski                          $cns_plane,
618*b1cdbd2cSJim Jagielski                          ", row ",
619*b1cdbd2cSJim Jagielski                          $cns_row,
620*b1cdbd2cSJim Jagielski                          " */\n";
621*b1cdbd2cSJim Jagielski            }
622*b1cdbd2cSJim Jagielski            else
623*b1cdbd2cSJim Jagielski            {
624*b1cdbd2cSJim Jagielski                print OUT "  ",
625*b1cdbd2cSJim Jagielski                          $cns_data_offsets[$cns_plane][$cns_row],
626*b1cdbd2cSJim Jagielski                          ", /* plane ",
627*b1cdbd2cSJim Jagielski                          $cns_plane,
628*b1cdbd2cSJim Jagielski                          ", row ",
629*b1cdbd2cSJim Jagielski                          $cns_row,
630*b1cdbd2cSJim Jagielski                          "; ",
631*b1cdbd2cSJim Jagielski                          printStats($cns_data_used[$cns_plane][$cns_row],
632*b1cdbd2cSJim Jagielski                                     $cns_data_space[$cns_plane][$cns_row]),
633*b1cdbd2cSJim Jagielski                          " */\n";
634*b1cdbd2cSJim Jagielski                $cns_rowoffsets_used[$cns_plane] += 4;
635*b1cdbd2cSJim Jagielski            }
636*b1cdbd2cSJim Jagielski        }
637*b1cdbd2cSJim Jagielski    }
638*b1cdbd2cSJim Jagielski    else
639*b1cdbd2cSJim Jagielski    {
640*b1cdbd2cSJim Jagielski        print OUT "  /* plane ", $cns_plane, ": --- */\n";
641*b1cdbd2cSJim Jagielski    }
642*b1cdbd2cSJim Jagielski}
643*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
644*b1cdbd2cSJim Jagielski
645*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImpl",
646*b1cdbd2cSJim Jagielski          $id,
647*b1cdbd2cSJim Jagielski          "ToUnicodePlaneOffsets[] = {\n";
648*b1cdbd2cSJim Jagielski$cns_row_offset = 0;
649*b1cdbd2cSJim Jagielskifor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
650*b1cdbd2cSJim Jagielski{
651*b1cdbd2cSJim Jagielski    if (defined ($cns_plane_used[$cns_plane]))
652*b1cdbd2cSJim Jagielski    {
653*b1cdbd2cSJim Jagielski        print OUT "  ",
654*b1cdbd2cSJim Jagielski                  $cns_row_offset++,
655*b1cdbd2cSJim Jagielski                  " * 94, /* plane ",
656*b1cdbd2cSJim Jagielski                  $cns_plane,
657*b1cdbd2cSJim Jagielski                  "; ",
658*b1cdbd2cSJim Jagielski                  printStats($cns_rowoffsets_used[$cns_plane], 94 * 4),
659*b1cdbd2cSJim Jagielski                  " */\n";
660*b1cdbd2cSJim Jagielski    }
661*b1cdbd2cSJim Jagielski    else
662*b1cdbd2cSJim Jagielski    {
663*b1cdbd2cSJim Jagielski        print OUT "  -1, /* plane ", $cns_plane, " */\n";
664*b1cdbd2cSJim Jagielski    }
665*b1cdbd2cSJim Jagielski}
666*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
667*b1cdbd2cSJim Jagielski
668*b1cdbd2cSJim Jagielskiprint OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n";
669*b1cdbd2cSJim Jagielski$uni_data_index = 0;
670*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
671*b1cdbd2cSJim Jagielski{
672*b1cdbd2cSJim Jagielski    if (defined($uni_plane_used[$uni_plane]))
673*b1cdbd2cSJim Jagielski    {
674*b1cdbd2cSJim Jagielski        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
675*b1cdbd2cSJim Jagielski        {
676*b1cdbd2cSJim Jagielski            if (defined($uni_page_used[$uni_plane][$uni_page]))
677*b1cdbd2cSJim Jagielski            {
678*b1cdbd2cSJim Jagielski                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
679*b1cdbd2cSJim Jagielski                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
680*b1cdbd2cSJim Jagielski                          " */\n";
681*b1cdbd2cSJim Jagielski
682*b1cdbd2cSJim Jagielski                $uni_page_first = -1;
683*b1cdbd2cSJim Jagielski                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
684*b1cdbd2cSJim Jagielski                {
685*b1cdbd2cSJim Jagielski                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
686*b1cdbd2cSJim Jagielski                    {
687*b1cdbd2cSJim Jagielski                        if ($uni_page_first == -1)
688*b1cdbd2cSJim Jagielski                        {
689*b1cdbd2cSJim Jagielski                            $uni_page_first = $uni_index;
690*b1cdbd2cSJim Jagielski                        }
691*b1cdbd2cSJim Jagielski                        $uni_page_last = $uni_index;
692*b1cdbd2cSJim Jagielski                    }
693*b1cdbd2cSJim Jagielski                }
694*b1cdbd2cSJim Jagielski
695*b1cdbd2cSJim Jagielski                $uni_data_used[$uni_plane][$uni_page] = 0;
696*b1cdbd2cSJim Jagielski
697*b1cdbd2cSJim Jagielski                print OUT "  ", $uni_page_first, ", ", $uni_page_last,
698*b1cdbd2cSJim Jagielski                          ", /* first, last */\n";
699*b1cdbd2cSJim Jagielski                $uni_data_index += 2;
700*b1cdbd2cSJim Jagielski                $uni_data_used[$uni_plane][$uni_page] += 2;
701*b1cdbd2cSJim Jagielski
702*b1cdbd2cSJim Jagielski                print OUT "  ", printSpaces(9, 8, $uni_page_first);
703*b1cdbd2cSJim Jagielski                $bol = 0;
704*b1cdbd2cSJim Jagielski                for ($uni_index = $uni_page_first;
705*b1cdbd2cSJim Jagielski                     $uni_index <= $uni_page_last;
706*b1cdbd2cSJim Jagielski                     ++$uni_index)
707*b1cdbd2cSJim Jagielski                {
708*b1cdbd2cSJim Jagielski                    if ($bol == 1)
709*b1cdbd2cSJim Jagielski                    {
710*b1cdbd2cSJim Jagielski                        print OUT "  ";
711*b1cdbd2cSJim Jagielski                        $bol = 0;
712*b1cdbd2cSJim Jagielski                    }
713*b1cdbd2cSJim Jagielski                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
714*b1cdbd2cSJim Jagielski                    {
715*b1cdbd2cSJim Jagielski                        $cns = $uni_map[$uni_plane][$uni_page][$uni_index];
716*b1cdbd2cSJim Jagielski                        printf OUT "%2d,%2d,%2d,",
717*b1cdbd2cSJim Jagielski                                   $cns >> 16,
718*b1cdbd2cSJim Jagielski                                   $cns >> 8 & 0xFF,
719*b1cdbd2cSJim Jagielski                                   $cns & 0xFF;
720*b1cdbd2cSJim Jagielski                        $uni_data_used[$uni_plane][$uni_page] += 3;
721*b1cdbd2cSJim Jagielski                    }
722*b1cdbd2cSJim Jagielski                    else
723*b1cdbd2cSJim Jagielski                    {
724*b1cdbd2cSJim Jagielski                        print OUT " 0, 0, 0,";
725*b1cdbd2cSJim Jagielski                    }
726*b1cdbd2cSJim Jagielski                    $uni_data_index += 3;
727*b1cdbd2cSJim Jagielski                    if ($uni_index % 8 == 7)
728*b1cdbd2cSJim Jagielski                    {
729*b1cdbd2cSJim Jagielski                        print OUT "\n";
730*b1cdbd2cSJim Jagielski                        $bol = 1;
731*b1cdbd2cSJim Jagielski                    }
732*b1cdbd2cSJim Jagielski                }
733*b1cdbd2cSJim Jagielski                if ($bol == 0)
734*b1cdbd2cSJim Jagielski                {
735*b1cdbd2cSJim Jagielski                    print OUT "\n";
736*b1cdbd2cSJim Jagielski                }
737*b1cdbd2cSJim Jagielski
738*b1cdbd2cSJim Jagielski                $uni_data_space[$uni_plane][$uni_page]
739*b1cdbd2cSJim Jagielski                    = $uni_data_index
740*b1cdbd2cSJim Jagielski                          - $uni_data_offsets[$uni_plane][$uni_page];
741*b1cdbd2cSJim Jagielski            }
742*b1cdbd2cSJim Jagielski            else
743*b1cdbd2cSJim Jagielski            {
744*b1cdbd2cSJim Jagielski                $uni_data_offsets[$uni_plane][$uni_page] = -1;
745*b1cdbd2cSJim Jagielski                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
746*b1cdbd2cSJim Jagielski                          ": --- */\n";
747*b1cdbd2cSJim Jagielski            }
748*b1cdbd2cSJim Jagielski        }
749*b1cdbd2cSJim Jagielski    }
750*b1cdbd2cSJim Jagielski    else
751*b1cdbd2cSJim Jagielski    {
752*b1cdbd2cSJim Jagielski        print OUT " /* plane ", $uni_plane, ": --- */\n";
753*b1cdbd2cSJim Jagielski    }
754*b1cdbd2cSJim Jagielski}
755*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
756*b1cdbd2cSJim Jagielski
757*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
758*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
759*b1cdbd2cSJim Jagielski{
760*b1cdbd2cSJim Jagielski    if (defined($uni_plane_used[$uni_plane]))
761*b1cdbd2cSJim Jagielski    {
762*b1cdbd2cSJim Jagielski        $uni_pageoffsets_used[$uni_plane] = 0;
763*b1cdbd2cSJim Jagielski        $uni_data_used_sum[$uni_plane] = 0;
764*b1cdbd2cSJim Jagielski        $uni_data_space_sum[$uni_plane] = 0;
765*b1cdbd2cSJim Jagielski        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
766*b1cdbd2cSJim Jagielski        {
767*b1cdbd2cSJim Jagielski            $offset = $uni_data_offsets[$uni_plane][$uni_page];
768*b1cdbd2cSJim Jagielski            if ($offset == -1)
769*b1cdbd2cSJim Jagielski            {
770*b1cdbd2cSJim Jagielski                print OUT "  -1, /* plane ",
771*b1cdbd2cSJim Jagielski                          $uni_plane,
772*b1cdbd2cSJim Jagielski                          ", page ",
773*b1cdbd2cSJim Jagielski                          $uni_page,
774*b1cdbd2cSJim Jagielski                          " */\n";
775*b1cdbd2cSJim Jagielski            }
776*b1cdbd2cSJim Jagielski            else
777*b1cdbd2cSJim Jagielski            {
778*b1cdbd2cSJim Jagielski                print OUT "  ",
779*b1cdbd2cSJim Jagielski                          $offset,
780*b1cdbd2cSJim Jagielski                          ", /* plane ",
781*b1cdbd2cSJim Jagielski                          $uni_plane,
782*b1cdbd2cSJim Jagielski                          ", page ",
783*b1cdbd2cSJim Jagielski                          $uni_page,
784*b1cdbd2cSJim Jagielski                          "; ",
785*b1cdbd2cSJim Jagielski                          printStats($uni_data_used[$uni_plane][$uni_page],
786*b1cdbd2cSJim Jagielski                                     $uni_data_space[$uni_plane][$uni_page]),
787*b1cdbd2cSJim Jagielski                          " */\n";
788*b1cdbd2cSJim Jagielski                $uni_pageoffsets_used[$uni_plane] += 4;
789*b1cdbd2cSJim Jagielski                $uni_data_used_sum[$uni_plane]
790*b1cdbd2cSJim Jagielski                    += $uni_data_used[$uni_plane][$uni_page];
791*b1cdbd2cSJim Jagielski                $uni_data_space_sum[$uni_plane]
792*b1cdbd2cSJim Jagielski                    += $uni_data_space[$uni_plane][$uni_page];
793*b1cdbd2cSJim Jagielski            }
794*b1cdbd2cSJim Jagielski        }
795*b1cdbd2cSJim Jagielski    }
796*b1cdbd2cSJim Jagielski    else
797*b1cdbd2cSJim Jagielski    {
798*b1cdbd2cSJim Jagielski        print OUT "  /* plane ", $uni_plane, ": --- */\n";
799*b1cdbd2cSJim Jagielski    }
800*b1cdbd2cSJim Jagielski}
801*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
802*b1cdbd2cSJim Jagielski
803*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImplUnicodeTo",
804*b1cdbd2cSJim Jagielski          $id,
805*b1cdbd2cSJim Jagielski          "PlaneOffsets[] = {\n";
806*b1cdbd2cSJim Jagielski$uni_page_offset = 0;
807*b1cdbd2cSJim Jagielski$uni_planeoffsets_used = 0;
808*b1cdbd2cSJim Jagielski$uni_pageoffsets_used_sum = 0;
809*b1cdbd2cSJim Jagielski$uni_pageoffsets_space_sum = 0;
810*b1cdbd2cSJim Jagielski$uni_data_used_sum2 = 0;
811*b1cdbd2cSJim Jagielski$uni_data_space_sum2 = 0;
812*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
813*b1cdbd2cSJim Jagielski{
814*b1cdbd2cSJim Jagielski    if (defined ($uni_plane_used[$uni_plane]))
815*b1cdbd2cSJim Jagielski    {
816*b1cdbd2cSJim Jagielski        print OUT "  ",
817*b1cdbd2cSJim Jagielski                  $uni_page_offset++,
818*b1cdbd2cSJim Jagielski                  " * 256, /* plane ",
819*b1cdbd2cSJim Jagielski                  $uni_plane,
820*b1cdbd2cSJim Jagielski                  "; ",
821*b1cdbd2cSJim Jagielski                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
822*b1cdbd2cSJim Jagielski                  ", ",
823*b1cdbd2cSJim Jagielski                  printStats($uni_data_used_sum[$uni_plane],
824*b1cdbd2cSJim Jagielski                             $uni_data_space_sum[$uni_plane]),
825*b1cdbd2cSJim Jagielski                  " */\n";
826*b1cdbd2cSJim Jagielski        $uni_planeoffsets_used += 4;
827*b1cdbd2cSJim Jagielski        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
828*b1cdbd2cSJim Jagielski        $uni_pageoffsets_space_sum += 256 * 4;
829*b1cdbd2cSJim Jagielski        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
830*b1cdbd2cSJim Jagielski        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
831*b1cdbd2cSJim Jagielski    }
832*b1cdbd2cSJim Jagielski    else
833*b1cdbd2cSJim Jagielski    {
834*b1cdbd2cSJim Jagielski        print OUT "  -1, /* plane ", $uni_plane, " */\n";
835*b1cdbd2cSJim Jagielski    }
836*b1cdbd2cSJim Jagielski}
837*b1cdbd2cSJim Jagielskiprint OUT " /* ",
838*b1cdbd2cSJim Jagielski          printStats($uni_planeoffsets_used, 17 * 4),
839*b1cdbd2cSJim Jagielski          ", ",
840*b1cdbd2cSJim Jagielski          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
841*b1cdbd2cSJim Jagielski          ", ",
842*b1cdbd2cSJim Jagielski          printStats($uni_data_used_sum2, $uni_data_space_sum2),
843*b1cdbd2cSJim Jagielski          " */\n};\n";
844*b1cdbd2cSJim Jagielski
845*b1cdbd2cSJim Jagielskiclose OUT;
846*b1cdbd2cSJim Jagielski
847*b1cdbd2cSJim Jagielskiprint "Unihan.txt = ", $count_Unihan_txt,
848*b1cdbd2cSJim Jagielski      ", CNS11643.TXT = ", $count_CNS11643_TXT,
849*b1cdbd2cSJim Jagielski      ", Uni2CNS = ", $count_Uni2CNS,
850*b1cdbd2cSJim Jagielski      ", total = ",
851*b1cdbd2cSJim Jagielski          ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS),
852*b1cdbd2cSJim Jagielski      "\n";
853