1*c667dd47SPedro Giffuni#!/usr/bin/env perl
27e90fac2SAndrew Rist#**************************************************************
37e90fac2SAndrew Rist#
47e90fac2SAndrew Rist#  Licensed to the Apache Software Foundation (ASF) under one
57e90fac2SAndrew Rist#  or more contributor license agreements.  See the NOTICE file
67e90fac2SAndrew Rist#  distributed with this work for additional information
77e90fac2SAndrew Rist#  regarding copyright ownership.  The ASF licenses this file
87e90fac2SAndrew Rist#  to you under the Apache License, Version 2.0 (the
97e90fac2SAndrew Rist#  "License"); you may not use this file except in compliance
107e90fac2SAndrew Rist#  with the License.  You may obtain a copy of the License at
117e90fac2SAndrew Rist#
127e90fac2SAndrew Rist#    http://www.apache.org/licenses/LICENSE-2.0
137e90fac2SAndrew Rist#
147e90fac2SAndrew Rist#  Unless required by applicable law or agreed to in writing,
157e90fac2SAndrew Rist#  software distributed under the License is distributed on an
167e90fac2SAndrew Rist#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
177e90fac2SAndrew Rist#  KIND, either express or implied.  See the License for the
187e90fac2SAndrew Rist#  specific language governing permissions and limitations
197e90fac2SAndrew Rist#  under the License.
207e90fac2SAndrew Rist#
217e90fac2SAndrew Rist#**************************************************************
227e90fac2SAndrew Rist
237e90fac2SAndrew Rist
24cdf0e10cSrcweir
25cdf0e10cSrcweir# The following files must be available in a ./input subdir:
26cdf0e10cSrcweir
27cdf0e10cSrcweir# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>:
28cdf0e10cSrcweir#  "Unicode version: 3.1.1    Table version: 1.1    Date: 28 June 2001"
29cdf0e10cSrcweir#  contains descriptions for:
30cdf0e10cSrcweir#   U+3400..4DFF CJK Unified Ideographs Extension A
31cdf0e10cSrcweir#   U+4E00..9FFF CJK Unified Ideographs
32cdf0e10cSrcweir#   U+F900..FAFF CJK Compatibility Ideographs
33cdf0e10cSrcweir#   U+20000..2F7FF CJK Unified Ideographs Extension B
34cdf0e10cSrcweir#   U+2F800..2FFFF CJK Compatibility Ideographs Supplement
35cdf0e10cSrcweir
36cdf0e10cSrcweir# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>:
37cdf0e10cSrcweir#  "Unicode version: 1.1    Table version: 0.0d1    Date: 21 October 1994"
38cdf0e10cSrcweir#  contains mappings for CNS 11643-1986
39cdf0e10cSrcweir
40cdf0e10cSrcweir# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>:
41cdf0e10cSrcweir#  "Unicode version: 1.1    Table version: 0.49    Date: 26 March 1998"
42cdf0e10cSrcweir#  contains mappings for CNS 11643-1992 that are incompatible with
43cdf0e10cSrcweir#   CNS11643.TXT
44cdf0e10cSrcweir
45cdf0e10cSrcweir$id = "Cns116431992";
46cdf0e10cSrcweir
47cdf0e10cSrcweirsub isValidUtf32
48cdf0e10cSrcweir{
49cdf0e10cSrcweir    my $utf32 = $_[0];
50cdf0e10cSrcweir    return $utf32 >= 0 && $utf32 <= 0x10FFFF
51cdf0e10cSrcweir           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
52cdf0e10cSrcweir           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
53cdf0e10cSrcweir           && ($utf32 & 0xFFFF) < 0xFFFE;
54cdf0e10cSrcweir}
55cdf0e10cSrcweir
56cdf0e10cSrcweirsub printUtf32
57cdf0e10cSrcweir{
58cdf0e10cSrcweir    my $utf32 = $_[0];
59cdf0e10cSrcweir    return sprintf("U+%04X", $utf32);
60cdf0e10cSrcweir}
61cdf0e10cSrcweir
62cdf0e10cSrcweirsub isValidCns116431992
63cdf0e10cSrcweir{
64cdf0e10cSrcweir    my $plane = $_[0];
65cdf0e10cSrcweir    my $row = $_[1];
66cdf0e10cSrcweir    my $column = $_[2];
67cdf0e10cSrcweir    return $plane >= 1 && $plane <= 16
68cdf0e10cSrcweir           && $row >= 1 && $row <= 94
69cdf0e10cSrcweir           && $column >= 1 && $column <= 94;
70cdf0e10cSrcweir}
71cdf0e10cSrcweir
72cdf0e10cSrcweirsub printCns116431992
73cdf0e10cSrcweir{
74cdf0e10cSrcweir    my $plane = $_[0];
75cdf0e10cSrcweir    my $row = $_[1];
76cdf0e10cSrcweir    my $column = $_[2];
77cdf0e10cSrcweir    return sprintf("%d-%02d/%02d", $plane, $row, $column);
78cdf0e10cSrcweir}
79cdf0e10cSrcweir
80cdf0e10cSrcweirsub printStats
81cdf0e10cSrcweir{
82cdf0e10cSrcweir    my $used = $_[0];
83cdf0e10cSrcweir    my $space = $_[1];
84cdf0e10cSrcweir    return sprintf("%d/%d bytes (%.1f%%)",
85cdf0e10cSrcweir                   $used,
86cdf0e10cSrcweir                   $space,
87cdf0e10cSrcweir                   $used * 100 / $space);
88cdf0e10cSrcweir}
89cdf0e10cSrcweir
90cdf0e10cSrcweirsub printSpaces
91cdf0e10cSrcweir{
92cdf0e10cSrcweir    my $column_width = $_[0];
93cdf0e10cSrcweir    my $columns_per_line = $_[1];
94cdf0e10cSrcweir    my $end = $_[2];
95cdf0e10cSrcweir    $output = "";
96cdf0e10cSrcweir    for ($i = int($end / $columns_per_line) * $columns_per_line;
97cdf0e10cSrcweir         $i < $end;
98cdf0e10cSrcweir         ++$i)
99cdf0e10cSrcweir    {
100cdf0e10cSrcweir        for ($j = 0; $j < $column_width; ++$j)
101cdf0e10cSrcweir        {
102cdf0e10cSrcweir            $output = $output . " ";
103cdf0e10cSrcweir        }
104cdf0e10cSrcweir    }
105cdf0e10cSrcweir    return $output;
106cdf0e10cSrcweir}
107cdf0e10cSrcweir
108cdf0e10cSrcweir$count_Unihan_txt = 0;
109cdf0e10cSrcweir$count_CNS11643_TXT = 0;
110cdf0e10cSrcweir$count_Uni2CNS = 0;
111cdf0e10cSrcweir
112cdf0e10cSrcweirif (1)
113cdf0e10cSrcweir{
114cdf0e10cSrcweir    $filename = "Unihan.txt";
115cdf0e10cSrcweir    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
116cdf0e10cSrcweir    while (<IN>)
117cdf0e10cSrcweir    {
118cdf0e10cSrcweir        if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
119cdf0e10cSrcweir        {
120cdf0e10cSrcweir            $utf32 = oct("0x" . $1);
121cdf0e10cSrcweir            $cns_plane = oct("0x" . $2);
122cdf0e10cSrcweir            $cns_row = oct("0x" . $3) - 0x20;
123cdf0e10cSrcweir            $cns_column = oct("0x" . $4) - 0x20;
124cdf0e10cSrcweir            isValidUtf32($utf32)
125cdf0e10cSrcweir                or die "Bad UTF32 char U+" . printUtf32($utf32);
126cdf0e10cSrcweir            isValidCns116431992($cns_plane, $cns_row, $cns_column)
127cdf0e10cSrcweir                or die "Bad CNS11643-1992 char "
128cdf0e10cSrcweir                           . printCns116431992($cns_plane,
129cdf0e10cSrcweir                                               $cns_row,
130cdf0e10cSrcweir                                               $cns_column);
131cdf0e10cSrcweir            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
132cdf0e10cSrcweir            {
133cdf0e10cSrcweir                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
134cdf0e10cSrcweir                $cns_plane_used[$cns_plane] = 1;
135cdf0e10cSrcweir                ++$count_Unihan_txt;
136cdf0e10cSrcweir            }
137cdf0e10cSrcweir            else
138cdf0e10cSrcweir            {
139cdf0e10cSrcweir                ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
140cdf0e10cSrcweir                    or die "Mapping "
141cdf0e10cSrcweir                               . printCns116431992($cns_plane,
142cdf0e10cSrcweir                                                   $cns_row,
143cdf0e10cSrcweir                                                   $cns_column)
144cdf0e10cSrcweir                               . " to "
145cdf0e10cSrcweir                               . printUtf32($cns_map[$cns_plane]
146cdf0e10cSrcweir                                                    [$cns_row]
147cdf0e10cSrcweir                                                    [$cns_column])
148cdf0e10cSrcweir                               . ", NOT "
149cdf0e10cSrcweir                               . printUtf32($utf32);
150cdf0e10cSrcweir            }
151cdf0e10cSrcweir        }
152cdf0e10cSrcweir        elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
153cdf0e10cSrcweir        {
154cdf0e10cSrcweir            $utf32 = oct("0x" . $1);
155cdf0e10cSrcweir            $cns_plane = oct("0x" . $2);
156cdf0e10cSrcweir            $cns_row = oct("0x" . $3) - 0x20;
157cdf0e10cSrcweir            $cns_column = oct("0x" . $4) - 0x20;
158cdf0e10cSrcweir            isValidUtf32($utf32)
159cdf0e10cSrcweir                or die "Bad UTF32 char U+" . printUtf32($utf32);
160cdf0e10cSrcweir            isValidCns116431992($cns_plane, $cns_row, $cns_column)
161cdf0e10cSrcweir                or die "Bad CNS11643-1992 char "
162cdf0e10cSrcweir                           . printCns116431992($cns_plane,
163cdf0e10cSrcweir                                               $cns_row,
164cdf0e10cSrcweir                                               $cns_column);
165cdf0e10cSrcweir            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
166cdf0e10cSrcweir            {
167cdf0e10cSrcweir                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
168cdf0e10cSrcweir                $cns_plane_used[$cns_plane] = 1;
169cdf0e10cSrcweir                ++$count_Unihan_txt;
170cdf0e10cSrcweir            }
171cdf0e10cSrcweir            else
172cdf0e10cSrcweir            {
173cdf0e10cSrcweir                ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
174cdf0e10cSrcweir                    or print "WARNING!  Mapping ",
175cdf0e10cSrcweir                             printCns116431992($cns_plane,
176cdf0e10cSrcweir                                               $cns_row,
177cdf0e10cSrcweir                                               $cns_column),
178cdf0e10cSrcweir                             " to ",
179cdf0e10cSrcweir                             printUtf32($cns_map[$cns_plane]
180cdf0e10cSrcweir                                                [$cns_row]
181cdf0e10cSrcweir                                                [$cns_column]),
182cdf0e10cSrcweir                             ", NOT ",
183cdf0e10cSrcweir                             printUtf32($utf32),
184cdf0e10cSrcweir                             "\n";
185cdf0e10cSrcweir            }
186cdf0e10cSrcweir        }
187cdf0e10cSrcweir        elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/)
188cdf0e10cSrcweir        {
189cdf0e10cSrcweir            die "Bad format";
190cdf0e10cSrcweir        }
191cdf0e10cSrcweir    }
192cdf0e10cSrcweir    close IN;
193cdf0e10cSrcweir}
194cdf0e10cSrcweir
195cdf0e10cSrcweirif (1)
196cdf0e10cSrcweir{
197cdf0e10cSrcweir    $filename = "CNS11643.TXT";
198cdf0e10cSrcweir    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
199cdf0e10cSrcweir    while (<IN>)
200cdf0e10cSrcweir    {
201cdf0e10cSrcweir        if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/)
202cdf0e10cSrcweir        {
203cdf0e10cSrcweir            $utf32 = oct("0x" . $4);
204cdf0e10cSrcweir            $cns_plane = oct("0x" . $1);
205cdf0e10cSrcweir            $cns_row = oct("0x" . $2) - 0x20;
206cdf0e10cSrcweir            $cns_column = oct("0x" . $3) - 0x20;
207cdf0e10cSrcweir            isValidUtf32($utf32)
208cdf0e10cSrcweir                or die "Bad UTF32 char U+" . printUtf32($utf32);
209cdf0e10cSrcweir            isValidCns116431992($cns_plane, $cns_row, $cns_column)
210cdf0e10cSrcweir                or die "Bad CNS11643-1992 char "
211cdf0e10cSrcweir                           . printCns116431992($cns_plane,
212cdf0e10cSrcweir                                               $cns_row,
213cdf0e10cSrcweir                                               $cns_column);
214cdf0e10cSrcweir            if ($cns_plane <= 2)
215cdf0e10cSrcweir            {
216cdf0e10cSrcweir                if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
217cdf0e10cSrcweir                {
218cdf0e10cSrcweir                    $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
219cdf0e10cSrcweir                    $cns_plane_used[$cns_plane] = 1;
220cdf0e10cSrcweir                    ++$count_CNS11643_TXT;
221cdf0e10cSrcweir                }
222cdf0e10cSrcweir                else
223cdf0e10cSrcweir                {
224cdf0e10cSrcweir                    ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
225cdf0e10cSrcweir                        or die "Mapping "
226cdf0e10cSrcweir                                   . printCns116431992($cns_plane,
227cdf0e10cSrcweir                                                       $cns_row,
228cdf0e10cSrcweir                                                       $cns_column)
229cdf0e10cSrcweir                                   . " to "
230cdf0e10cSrcweir                                   . printUtf32($cns_map[$cns_plane]
231cdf0e10cSrcweir                                                        [$cns_row]
232cdf0e10cSrcweir                                                        [$cns_column])
233cdf0e10cSrcweir                                   . ", NOT "
234cdf0e10cSrcweir                                   . printUtf32($utf32);
235cdf0e10cSrcweir                }
236cdf0e10cSrcweir            }
237cdf0e10cSrcweir        }
238cdf0e10cSrcweir    }
239cdf0e10cSrcweir    close IN;
240cdf0e10cSrcweir}
241cdf0e10cSrcweir
242cdf0e10cSrcweirif (0)
243cdf0e10cSrcweir{
244cdf0e10cSrcweir    $filename = "Uni2CNS";
245cdf0e10cSrcweir    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
246cdf0e10cSrcweir    while (<IN>)
247cdf0e10cSrcweir    {
248cdf0e10cSrcweir        if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/)
249cdf0e10cSrcweir        {
250cdf0e10cSrcweir            $utf32 = oct("0x" . $1);
251cdf0e10cSrcweir            $cns_plane = oct("0x" . $2);
252cdf0e10cSrcweir            $cns_row = oct("0x" . $3) - 0x20;
253cdf0e10cSrcweir            $cns_column = oct("0x" . $4) - 0x20;
254cdf0e10cSrcweir            isValidUtf32($utf32)
255cdf0e10cSrcweir                or die "Bad UTF32 char U+" . printUtf32($utf32);
256cdf0e10cSrcweir            isValidCns116431992($cns_plane, $cns_row, $cns_column)
257cdf0e10cSrcweir                or die "Bad CNS11643-1992 char "
258cdf0e10cSrcweir                           . printCns116431992($cns_plane,
259cdf0e10cSrcweir                                               $cns_row,
260cdf0e10cSrcweir                                               $cns_column);
261cdf0e10cSrcweir            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
262cdf0e10cSrcweir            {
263cdf0e10cSrcweir                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
264cdf0e10cSrcweir                $cns_plane_used[$cns_plane] = 1;
265cdf0e10cSrcweir                ++$count_Uni2CNS;
266cdf0e10cSrcweir            }
267cdf0e10cSrcweir            else
268cdf0e10cSrcweir            {
269cdf0e10cSrcweir#               ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
270cdf0e10cSrcweir#                   or die "Mapping "
271cdf0e10cSrcweir#                              . printCns116431992($cns_plane,
272cdf0e10cSrcweir#                                                  $cns_row,
273cdf0e10cSrcweir#                                                  $cns_column)
274cdf0e10cSrcweir#                              . " to "
275cdf0e10cSrcweir#                              . printUtf32($cns_map[$cns_plane]
276cdf0e10cSrcweir#                                                   [$cns_row]
277cdf0e10cSrcweir#                                                   [$cns_column])
278cdf0e10cSrcweir#                              . ", NOT "
279cdf0e10cSrcweir#                              . printUtf32($utf32);
280cdf0e10cSrcweir            }
281cdf0e10cSrcweir            if ($cns_plane == 1)
282cdf0e10cSrcweir            {
283cdf0e10cSrcweir                print printCns116431992($cns_plane, $cns_row, $cns_column),
284cdf0e10cSrcweir                      "\n";
285cdf0e10cSrcweir            }
286cdf0e10cSrcweir        }
287cdf0e10cSrcweir    }
288cdf0e10cSrcweir    close IN;
289cdf0e10cSrcweir}
290cdf0e10cSrcweir
291cdf0e10cSrcweirfor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
292cdf0e10cSrcweir{
293cdf0e10cSrcweir    if (defined($cns_plane_used[$cns_plane]))
294cdf0e10cSrcweir    {
295cdf0e10cSrcweir        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
296cdf0e10cSrcweir        {
297cdf0e10cSrcweir            for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
298cdf0e10cSrcweir            {
299cdf0e10cSrcweir                if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
300cdf0e10cSrcweir                {
301cdf0e10cSrcweir                    $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
302cdf0e10cSrcweir                    $uni_plane = $utf32 >> 16;
303cdf0e10cSrcweir                    $uni_page = ($utf32 >> 8) & 0xFF;
304cdf0e10cSrcweir                    $uni_index = $utf32 & 0xFF;
305cdf0e10cSrcweir                    if (!defined($uni_plane_used[$uni_plane])
306cdf0e10cSrcweir                        || !defined($uni_page_used[$uni_plane][$uni_page])
307cdf0e10cSrcweir                        || !defined($uni_map[$uni_plane]
308cdf0e10cSrcweir                                            [$uni_page]
309cdf0e10cSrcweir                                            [$uni_index]))
310cdf0e10cSrcweir                    {
311cdf0e10cSrcweir                        $uni_map[$uni_plane][$uni_page][$uni_index]
312cdf0e10cSrcweir                            = ($cns_plane << 16)
313cdf0e10cSrcweir                                  | ($cns_row << 8)
314cdf0e10cSrcweir                                  | $cns_column;
315cdf0e10cSrcweir                        $uni_plane_used[$uni_plane] = 1;
316cdf0e10cSrcweir                        $uni_page_used[$uni_plane][$uni_page] = 1;
317cdf0e10cSrcweir                    }
318cdf0e10cSrcweir                    else
319cdf0e10cSrcweir                    {
320cdf0e10cSrcweir                        $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index];
321cdf0e10cSrcweir                        $cns1_plane = $cns1 >> 16;
322cdf0e10cSrcweir                        $cns1_row = ($cns1 >> 8) & 0xFF;
323cdf0e10cSrcweir                        $cns1_column = $cns1 & 0xFF;
324cdf0e10cSrcweir
32586e1cf34SPedro Giffuni                        # Do not map from Unicode to Fictitious Character Set
326cdf0e10cSrcweir                        # Extensions (Lunde, p. 131), if possible:
327cdf0e10cSrcweir                        if ($cns_plane == 3
328cdf0e10cSrcweir                            && ($cns_row == 66 && $cns_column > 38
329cdf0e10cSrcweir                                || $cns_row > 66))
330cdf0e10cSrcweir                        {
331cdf0e10cSrcweir                            print " (",
332cdf0e10cSrcweir                                  printUtf32($utf32),
33386e1cf34SPedro Giffuni                                  " to fictitious ",
334cdf0e10cSrcweir                                  printCns116431992($cns_plane,
335cdf0e10cSrcweir                                                    $cns_row,
336cdf0e10cSrcweir                                                    $cns_column),
337cdf0e10cSrcweir                                  " ignored, favouring ",
338cdf0e10cSrcweir                                  printCns116431992($cns1_plane,
339cdf0e10cSrcweir                                                    $cns1_row,
340cdf0e10cSrcweir                                                    $cns1_column),
341cdf0e10cSrcweir                                  ")\n";
342cdf0e10cSrcweir                        }
343cdf0e10cSrcweir                        elsif ($cns1_plane == 3
344cdf0e10cSrcweir                               && ($cns1_row == 66 && $cns1_column > 38
345cdf0e10cSrcweir                                   || $cns1_row > 66))
346cdf0e10cSrcweir                        {
347cdf0e10cSrcweir                            $uni_map[$uni_plane][$uni_page][$uni_index]
348cdf0e10cSrcweir                                = ($cns_plane << 16)
349cdf0e10cSrcweir                                       | ($cns_row << 8)
350cdf0e10cSrcweir                                       | $cns_column;
351cdf0e10cSrcweir                            print " (",
352cdf0e10cSrcweir                                  printUtf32($utf32),
35386e1cf34SPedro Giffuni                                  " to fictitious ",
354cdf0e10cSrcweir                                  printCns116431992($cns1_plane,
355cdf0e10cSrcweir                                                    $cns1_row,
356cdf0e10cSrcweir                                                    $cns1_column),
357cdf0e10cSrcweir                                  " ignored, favouring ",
358cdf0e10cSrcweir                                  printCns116431992($cns_plane,
359cdf0e10cSrcweir                                                    $cns_row,
360cdf0e10cSrcweir                                                    $cns_column),
361cdf0e10cSrcweir                                  ")\n";
362cdf0e10cSrcweir                        }
363cdf0e10cSrcweir                        else
364cdf0e10cSrcweir                        {
365cdf0e10cSrcweir                            print "WARNING!  Mapping ",
366cdf0e10cSrcweir                                  printUtf32($utf32),
367cdf0e10cSrcweir                                  " to ",
368cdf0e10cSrcweir                                  printCns116431992($cns1_plane,
369cdf0e10cSrcweir                                                    $cns1_row,
370cdf0e10cSrcweir                                                    $cns1_column),
371cdf0e10cSrcweir                                  ", NOT ",
372cdf0e10cSrcweir                                  printCns116431992($cns_plane,
373cdf0e10cSrcweir                                                    $cns_row,
374cdf0e10cSrcweir                                                    $cns_column),
375cdf0e10cSrcweir                                  "\n";
376cdf0e10cSrcweir                        }
377cdf0e10cSrcweir                    }
378cdf0e10cSrcweir                }
379cdf0e10cSrcweir            }
380cdf0e10cSrcweir        }
381cdf0e10cSrcweir    }
382cdf0e10cSrcweir}
383cdf0e10cSrcweirif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
384cdf0e10cSrcweir{
385cdf0e10cSrcweir    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
386cdf0e10cSrcweir    {
387cdf0e10cSrcweir        if (defined($uni_map[0][0][$uni_index]))
388cdf0e10cSrcweir        {
389cdf0e10cSrcweir            $cns = $uni_map[0][0][$utf32];
390cdf0e10cSrcweir            die "Mapping "
391cdf0e10cSrcweir                    . printUtf32($utf32)
392cdf0e10cSrcweir                    . " to "
393cdf0e10cSrcweir                    . printCns116431992($cns >> 16,
394cdf0e10cSrcweir                                        ($cns >> 8) & 0xFF,
395cdf0e10cSrcweir                                        $cns & 0xFF);
396cdf0e10cSrcweir        }
397cdf0e10cSrcweir    }
398cdf0e10cSrcweir}
399cdf0e10cSrcweir
400cdf0e10cSrcweir$filename = lc($id) . ".tab";
401cdf0e10cSrcweiropen OUT, ("> " . $filename) or die "Cannot write " . $filename;
402cdf0e10cSrcweir
403cdf0e10cSrcweir{
404cdf0e10cSrcweir    $filename = lc($id). ".pl";
405cdf0e10cSrcweir    open IN, $filename or die "Cannot read ". $filename;
406cdf0e10cSrcweir    $first = 1;
407cdf0e10cSrcweir    while (<IN>)
408cdf0e10cSrcweir    {
409cdf0e10cSrcweir        if (/^\#!.*$/)
410cdf0e10cSrcweir        {
411cdf0e10cSrcweir        }
412cdf0e10cSrcweir        elsif (/^\#(\*.*)$/)
413cdf0e10cSrcweir        {
414cdf0e10cSrcweir            if ($first == 1)
415cdf0e10cSrcweir            {
416cdf0e10cSrcweir                print OUT "/", $1, "\n";
417cdf0e10cSrcweir                $first = 0;
418cdf0e10cSrcweir            }
419cdf0e10cSrcweir            else
420cdf0e10cSrcweir            {
421cdf0e10cSrcweir                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
422cdf0e10cSrcweir            }
423cdf0e10cSrcweir        }
424cdf0e10cSrcweir        elsif (/^\# (.*)$/)
425cdf0e10cSrcweir        {
426cdf0e10cSrcweir            print OUT " *", $1, "\n";
427cdf0e10cSrcweir        }
428cdf0e10cSrcweir        elsif (/^\#(.*)$/)
429cdf0e10cSrcweir        {
430cdf0e10cSrcweir            print OUT " *", $1, "\n";
431cdf0e10cSrcweir        }
432cdf0e10cSrcweir        else
433cdf0e10cSrcweir        {
434cdf0e10cSrcweir            goto done;
435cdf0e10cSrcweir        }
436cdf0e10cSrcweir    }
437cdf0e10cSrcweir  done:
438cdf0e10cSrcweir}
439cdf0e10cSrcweir
440cdf0e10cSrcweirprint OUT "\n",
441cdf0e10cSrcweir          "#ifndef _SAL_TYPES_H_\n",
442cdf0e10cSrcweir          "#include \"sal/types.h\"\n",
443cdf0e10cSrcweir          "#endif\n",
444cdf0e10cSrcweir          "\n";
445cdf0e10cSrcweir
446cdf0e10cSrcweirprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
447cdf0e10cSrcweir$cns_data_index = 0;
448cdf0e10cSrcweirfor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
449cdf0e10cSrcweir{
450cdf0e10cSrcweir    if (defined($cns_plane_used[$cns_plane]))
451cdf0e10cSrcweir    {
452cdf0e10cSrcweir        $cns_rows = 0;
453cdf0e10cSrcweir        $cns_chars = 0;
454cdf0e10cSrcweir        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
455cdf0e10cSrcweir        {
456cdf0e10cSrcweir            $cns_row_first = -1;
457cdf0e10cSrcweir            for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
458cdf0e10cSrcweir            {
459cdf0e10cSrcweir                if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
460cdf0e10cSrcweir                {
461cdf0e10cSrcweir                    if ($cns_row_first == -1)
462cdf0e10cSrcweir                    {
463cdf0e10cSrcweir                        $cns_row_first = $cns_column;
464cdf0e10cSrcweir                    }
465cdf0e10cSrcweir                    $cns_row_last = $cns_column;
466cdf0e10cSrcweir                }
467cdf0e10cSrcweir            }
468cdf0e10cSrcweir            if ($cns_row_first != -1)
469cdf0e10cSrcweir            {
470cdf0e10cSrcweir                $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index;
471cdf0e10cSrcweir                ++$cns_rows;
472cdf0e10cSrcweir                print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
473cdf0e10cSrcweir                          " */\n";
474cdf0e10cSrcweir
475cdf0e10cSrcweir                $cns_row_surrogates_first = -1;
476cdf0e10cSrcweir                $cns_row_chars = 0;
477cdf0e10cSrcweir                $cns_row_surrogates = 0;
478cdf0e10cSrcweir
479cdf0e10cSrcweir                print OUT "  ", $cns_row_first, " | (", $cns_row_last,
480cdf0e10cSrcweir                          " << 8), /* first, last */\n";
481cdf0e10cSrcweir                ++$cns_data_index;
482cdf0e10cSrcweir
483cdf0e10cSrcweir                print OUT "  ", printSpaces(7, 10, $cns_row_first);
484cdf0e10cSrcweir                $bol = 0;
485cdf0e10cSrcweir                for ($cns_column = $cns_row_first;
486cdf0e10cSrcweir                     $cns_column <= $cns_row_last;
487cdf0e10cSrcweir                     ++$cns_column)
488cdf0e10cSrcweir                {
489cdf0e10cSrcweir                    if ($bol == 1)
490cdf0e10cSrcweir                    {
491cdf0e10cSrcweir                        print OUT "  ";
492cdf0e10cSrcweir                        $bol = 0;
493cdf0e10cSrcweir                    }
494cdf0e10cSrcweir                    if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
495cdf0e10cSrcweir                    {
496cdf0e10cSrcweir                        $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
497cdf0e10cSrcweir                        ++$cns_row_chars;
498cdf0e10cSrcweir                        if ($utf32 <= 0xFFFF)
499cdf0e10cSrcweir                        {
500cdf0e10cSrcweir                            printf OUT "0x%04X,", $utf32;
501cdf0e10cSrcweir                        }
502cdf0e10cSrcweir                        else
503cdf0e10cSrcweir                        {
504cdf0e10cSrcweir                            ++$cns_row_surrogates;
505cdf0e10cSrcweir                            printf OUT "0x%04X,",
506cdf0e10cSrcweir                                       (0xD800 | (($utf32 - 0x10000) >> 10));
507cdf0e10cSrcweir                            if ($cns_row_surrogates_first == -1)
508cdf0e10cSrcweir                            {
509cdf0e10cSrcweir                                $cns_row_surrogates_first = $cns_column;
510cdf0e10cSrcweir                            }
511cdf0e10cSrcweir                            $cns_row_surrogates_last = $cns_column;
512cdf0e10cSrcweir                        }
513cdf0e10cSrcweir                    }
514cdf0e10cSrcweir                    else
515cdf0e10cSrcweir                    {
516cdf0e10cSrcweir                        printf OUT "0xffff,";
517cdf0e10cSrcweir                    }
518cdf0e10cSrcweir                    ++$cns_data_index;
519cdf0e10cSrcweir                    if ($cns_column % 10 == 9)
520cdf0e10cSrcweir                    {
521cdf0e10cSrcweir                        print OUT "\n";
522cdf0e10cSrcweir                        $bol = 1;
523cdf0e10cSrcweir                    }
524cdf0e10cSrcweir                }
525cdf0e10cSrcweir                if ($bol == 0)
526cdf0e10cSrcweir                {
527cdf0e10cSrcweir                    print OUT "\n";
528cdf0e10cSrcweir                }
529cdf0e10cSrcweir
530cdf0e10cSrcweir                if ($cns_row_surrogates_first != -1)
531cdf0e10cSrcweir                {
532cdf0e10cSrcweir                    print OUT "  ", $cns_row_surrogates_first,
533cdf0e10cSrcweir                              ", /* first low-surrogate */\n";
534cdf0e10cSrcweir                    ++$cns_data_index;
535cdf0e10cSrcweir
536cdf0e10cSrcweir                    print OUT "  ",
537cdf0e10cSrcweir                              printSpaces(7, 10, $cns_row_surrogates_first);
538cdf0e10cSrcweir                    $bol = 0;
539cdf0e10cSrcweir                    for ($cns_column = $cns_row_surrogates_first;
540cdf0e10cSrcweir                         $cns_column <= $cns_row_surrogates_last;
541cdf0e10cSrcweir                         ++$cns_column)
542cdf0e10cSrcweir                    {
543cdf0e10cSrcweir                        if ($bol == 1)
544cdf0e10cSrcweir                        {
545cdf0e10cSrcweir                            print OUT "  ";
546cdf0e10cSrcweir                            $bol = 0;
547cdf0e10cSrcweir                        }
548cdf0e10cSrcweir                        $utf32 = 0;
549cdf0e10cSrcweir                        if (defined($cns_map[$cns_plane]
550cdf0e10cSrcweir                                            [$cns_row]
551cdf0e10cSrcweir                                            [$cns_column]))
552cdf0e10cSrcweir                        {
553cdf0e10cSrcweir                            $utf32
554cdf0e10cSrcweir                                = $cns_map[$cns_plane][$cns_row][$cns_column];
555cdf0e10cSrcweir                        }
556cdf0e10cSrcweir                        if ($utf32 <= 0xFFFF)
557cdf0e10cSrcweir                        {
558cdf0e10cSrcweir                            printf OUT "     0,";
559cdf0e10cSrcweir                        }
560cdf0e10cSrcweir                        else
561cdf0e10cSrcweir                        {
562cdf0e10cSrcweir                            printf OUT "0x%04X,",
563cdf0e10cSrcweir                                       (0xDC00
564cdf0e10cSrcweir                                            | (($utf32 - 0x10000) & 0x3FF));
565cdf0e10cSrcweir                        }
566cdf0e10cSrcweir                        ++$cns_data_index;
567cdf0e10cSrcweir                        if ($cns_column % 10 == 9)
568cdf0e10cSrcweir                        {
569cdf0e10cSrcweir                            print OUT "\n";
570cdf0e10cSrcweir                            $bol = 1;
571cdf0e10cSrcweir                        }
572cdf0e10cSrcweir                    }
573cdf0e10cSrcweir                    if ($bol == 0)
574cdf0e10cSrcweir                    {
575cdf0e10cSrcweir                        print OUT "\n";
576cdf0e10cSrcweir                    }
577cdf0e10cSrcweir                }
578cdf0e10cSrcweir
579cdf0e10cSrcweir                $cns_chars += $cns_row_chars;
580cdf0e10cSrcweir                $cns_data_space[$cns_plane][$cns_row]
581cdf0e10cSrcweir                    = ($cns_data_index
582cdf0e10cSrcweir                           - $cns_data_offsets[$cns_plane][$cns_row]) * 2;
583cdf0e10cSrcweir                $cns_data_used[$cns_plane][$cns_row]
584cdf0e10cSrcweir                    = (1 + $cns_row_chars
585cdf0e10cSrcweir                           + ($cns_row_surrogates == 0 ?
586cdf0e10cSrcweir                                  0 : 1 + $cns_row_surrogates)) * 2;
587cdf0e10cSrcweir            }
588cdf0e10cSrcweir            else
589cdf0e10cSrcweir            {
590cdf0e10cSrcweir                print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
591cdf0e10cSrcweir                          ": --- */\n";
592cdf0e10cSrcweir                $cns_data_offsets[$cns_plane][$cns_row] = -1;
593cdf0e10cSrcweir            }
594cdf0e10cSrcweir        }
595cdf0e10cSrcweir        print "cns plane ",
596cdf0e10cSrcweir              $cns_plane,
597cdf0e10cSrcweir              ": rows = ",
598cdf0e10cSrcweir              $cns_rows,
599cdf0e10cSrcweir              ", chars = ",
600cdf0e10cSrcweir              $cns_chars,
601cdf0e10cSrcweir              "\n";
602cdf0e10cSrcweir    }
603cdf0e10cSrcweir}
604cdf0e10cSrcweirprint OUT "};\n\n";
605cdf0e10cSrcweir
606cdf0e10cSrcweirprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
607cdf0e10cSrcweirfor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
608cdf0e10cSrcweir{
609cdf0e10cSrcweir    if (defined ($cns_plane_used[$cns_plane]))
610cdf0e10cSrcweir    {
611cdf0e10cSrcweir        $cns_rowoffsets_used[$cns_plane] = 0;
612cdf0e10cSrcweir        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
613cdf0e10cSrcweir        {
614cdf0e10cSrcweir            if ($cns_data_offsets[$cns_plane][$cns_row] == -1)
615cdf0e10cSrcweir            {
616cdf0e10cSrcweir                print OUT "  -1, /* plane ",
617cdf0e10cSrcweir                          $cns_plane,
618cdf0e10cSrcweir                          ", row ",
619cdf0e10cSrcweir                          $cns_row,
620cdf0e10cSrcweir                          " */\n";
621cdf0e10cSrcweir            }
622cdf0e10cSrcweir            else
623cdf0e10cSrcweir            {
624cdf0e10cSrcweir                print OUT "  ",
625cdf0e10cSrcweir                          $cns_data_offsets[$cns_plane][$cns_row],
626cdf0e10cSrcweir                          ", /* plane ",
627cdf0e10cSrcweir                          $cns_plane,
628cdf0e10cSrcweir                          ", row ",
629cdf0e10cSrcweir                          $cns_row,
630cdf0e10cSrcweir                          "; ",
631cdf0e10cSrcweir                          printStats($cns_data_used[$cns_plane][$cns_row],
632cdf0e10cSrcweir                                     $cns_data_space[$cns_plane][$cns_row]),
633cdf0e10cSrcweir                          " */\n";
634cdf0e10cSrcweir                $cns_rowoffsets_used[$cns_plane] += 4;
635cdf0e10cSrcweir            }
636cdf0e10cSrcweir        }
637cdf0e10cSrcweir    }
638cdf0e10cSrcweir    else
639cdf0e10cSrcweir    {
640cdf0e10cSrcweir        print OUT "  /* plane ", $cns_plane, ": --- */\n";
641cdf0e10cSrcweir    }
642cdf0e10cSrcweir}
643cdf0e10cSrcweirprint OUT "};\n\n";
644cdf0e10cSrcweir
645cdf0e10cSrcweirprint OUT "static sal_Int32 const aImpl",
646cdf0e10cSrcweir          $id,
647cdf0e10cSrcweir          "ToUnicodePlaneOffsets[] = {\n";
648cdf0e10cSrcweir$cns_row_offset = 0;
649cdf0e10cSrcweirfor ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
650cdf0e10cSrcweir{
651cdf0e10cSrcweir    if (defined ($cns_plane_used[$cns_plane]))
652cdf0e10cSrcweir    {
653cdf0e10cSrcweir        print OUT "  ",
654cdf0e10cSrcweir                  $cns_row_offset++,
655cdf0e10cSrcweir                  " * 94, /* plane ",
656cdf0e10cSrcweir                  $cns_plane,
657cdf0e10cSrcweir                  "; ",
658cdf0e10cSrcweir                  printStats($cns_rowoffsets_used[$cns_plane], 94 * 4),
659cdf0e10cSrcweir                  " */\n";
660cdf0e10cSrcweir    }
661cdf0e10cSrcweir    else
662cdf0e10cSrcweir    {
663cdf0e10cSrcweir        print OUT "  -1, /* plane ", $cns_plane, " */\n";
664cdf0e10cSrcweir    }
665cdf0e10cSrcweir}
666cdf0e10cSrcweirprint OUT "};\n\n";
667cdf0e10cSrcweir
668cdf0e10cSrcweirprint OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n";
669cdf0e10cSrcweir$uni_data_index = 0;
670cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
671cdf0e10cSrcweir{
672cdf0e10cSrcweir    if (defined($uni_plane_used[$uni_plane]))
673cdf0e10cSrcweir    {
674cdf0e10cSrcweir        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
675cdf0e10cSrcweir        {
676cdf0e10cSrcweir            if (defined($uni_page_used[$uni_plane][$uni_page]))
677cdf0e10cSrcweir            {
678cdf0e10cSrcweir                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
679cdf0e10cSrcweir                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
680cdf0e10cSrcweir                          " */\n";
681cdf0e10cSrcweir
682cdf0e10cSrcweir                $uni_page_first = -1;
683cdf0e10cSrcweir                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
684cdf0e10cSrcweir                {
685cdf0e10cSrcweir                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
686cdf0e10cSrcweir                    {
687cdf0e10cSrcweir                        if ($uni_page_first == -1)
688cdf0e10cSrcweir                        {
689cdf0e10cSrcweir                            $uni_page_first = $uni_index;
690cdf0e10cSrcweir                        }
691cdf0e10cSrcweir                        $uni_page_last = $uni_index;
692cdf0e10cSrcweir                    }
693cdf0e10cSrcweir                }
694cdf0e10cSrcweir
695cdf0e10cSrcweir                $uni_data_used[$uni_plane][$uni_page] = 0;
696cdf0e10cSrcweir
697cdf0e10cSrcweir                print OUT "  ", $uni_page_first, ", ", $uni_page_last,
698cdf0e10cSrcweir                          ", /* first, last */\n";
699cdf0e10cSrcweir                $uni_data_index += 2;
700cdf0e10cSrcweir                $uni_data_used[$uni_plane][$uni_page] += 2;
701cdf0e10cSrcweir
702cdf0e10cSrcweir                print OUT "  ", printSpaces(9, 8, $uni_page_first);
703cdf0e10cSrcweir                $bol = 0;
704cdf0e10cSrcweir                for ($uni_index = $uni_page_first;
705cdf0e10cSrcweir                     $uni_index <= $uni_page_last;
706cdf0e10cSrcweir                     ++$uni_index)
707cdf0e10cSrcweir                {
708cdf0e10cSrcweir                    if ($bol == 1)
709cdf0e10cSrcweir                    {
710cdf0e10cSrcweir                        print OUT "  ";
711cdf0e10cSrcweir                        $bol = 0;
712cdf0e10cSrcweir                    }
713cdf0e10cSrcweir                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
714cdf0e10cSrcweir                    {
715cdf0e10cSrcweir                        $cns = $uni_map[$uni_plane][$uni_page][$uni_index];
716cdf0e10cSrcweir                        printf OUT "%2d,%2d,%2d,",
717cdf0e10cSrcweir                                   $cns >> 16,
718cdf0e10cSrcweir                                   $cns >> 8 & 0xFF,
719cdf0e10cSrcweir                                   $cns & 0xFF;
720cdf0e10cSrcweir                        $uni_data_used[$uni_plane][$uni_page] += 3;
721cdf0e10cSrcweir                    }
722cdf0e10cSrcweir                    else
723cdf0e10cSrcweir                    {
724cdf0e10cSrcweir                        print OUT " 0, 0, 0,";
725cdf0e10cSrcweir                    }
726cdf0e10cSrcweir                    $uni_data_index += 3;
727cdf0e10cSrcweir                    if ($uni_index % 8 == 7)
728cdf0e10cSrcweir                    {
729cdf0e10cSrcweir                        print OUT "\n";
730cdf0e10cSrcweir                        $bol = 1;
731cdf0e10cSrcweir                    }
732cdf0e10cSrcweir                }
733cdf0e10cSrcweir                if ($bol == 0)
734cdf0e10cSrcweir                {
735cdf0e10cSrcweir                    print OUT "\n";
736cdf0e10cSrcweir                }
737cdf0e10cSrcweir
738cdf0e10cSrcweir                $uni_data_space[$uni_plane][$uni_page]
739cdf0e10cSrcweir                    = $uni_data_index
740cdf0e10cSrcweir                          - $uni_data_offsets[$uni_plane][$uni_page];
741cdf0e10cSrcweir            }
742cdf0e10cSrcweir            else
743cdf0e10cSrcweir            {
744cdf0e10cSrcweir                $uni_data_offsets[$uni_plane][$uni_page] = -1;
745cdf0e10cSrcweir                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
746cdf0e10cSrcweir                          ": --- */\n";
747cdf0e10cSrcweir            }
748cdf0e10cSrcweir        }
749cdf0e10cSrcweir    }
750cdf0e10cSrcweir    else
751cdf0e10cSrcweir    {
752cdf0e10cSrcweir        print OUT " /* plane ", $uni_plane, ": --- */\n";
753cdf0e10cSrcweir    }
754cdf0e10cSrcweir}
755cdf0e10cSrcweirprint OUT "};\n\n";
756cdf0e10cSrcweir
757cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
758cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
759cdf0e10cSrcweir{
760cdf0e10cSrcweir    if (defined($uni_plane_used[$uni_plane]))
761cdf0e10cSrcweir    {
762cdf0e10cSrcweir        $uni_pageoffsets_used[$uni_plane] = 0;
763cdf0e10cSrcweir        $uni_data_used_sum[$uni_plane] = 0;
764cdf0e10cSrcweir        $uni_data_space_sum[$uni_plane] = 0;
765cdf0e10cSrcweir        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
766cdf0e10cSrcweir        {
767cdf0e10cSrcweir            $offset = $uni_data_offsets[$uni_plane][$uni_page];
768cdf0e10cSrcweir            if ($offset == -1)
769cdf0e10cSrcweir            {
770cdf0e10cSrcweir                print OUT "  -1, /* plane ",
771cdf0e10cSrcweir                          $uni_plane,
772cdf0e10cSrcweir                          ", page ",
773cdf0e10cSrcweir                          $uni_page,
774cdf0e10cSrcweir                          " */\n";
775cdf0e10cSrcweir            }
776cdf0e10cSrcweir            else
777cdf0e10cSrcweir            {
778cdf0e10cSrcweir                print OUT "  ",
779cdf0e10cSrcweir                          $offset,
780cdf0e10cSrcweir                          ", /* plane ",
781cdf0e10cSrcweir                          $uni_plane,
782cdf0e10cSrcweir                          ", page ",
783cdf0e10cSrcweir                          $uni_page,
784cdf0e10cSrcweir                          "; ",
785cdf0e10cSrcweir                          printStats($uni_data_used[$uni_plane][$uni_page],
786cdf0e10cSrcweir                                     $uni_data_space[$uni_plane][$uni_page]),
787cdf0e10cSrcweir                          " */\n";
788cdf0e10cSrcweir                $uni_pageoffsets_used[$uni_plane] += 4;
789cdf0e10cSrcweir                $uni_data_used_sum[$uni_plane]
790cdf0e10cSrcweir                    += $uni_data_used[$uni_plane][$uni_page];
791cdf0e10cSrcweir                $uni_data_space_sum[$uni_plane]
792cdf0e10cSrcweir                    += $uni_data_space[$uni_plane][$uni_page];
793cdf0e10cSrcweir            }
794cdf0e10cSrcweir        }
795cdf0e10cSrcweir    }
796cdf0e10cSrcweir    else
797cdf0e10cSrcweir    {
798cdf0e10cSrcweir        print OUT "  /* plane ", $uni_plane, ": --- */\n";
799cdf0e10cSrcweir    }
800cdf0e10cSrcweir}
801cdf0e10cSrcweirprint OUT "};\n\n";
802cdf0e10cSrcweir
803cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo",
804cdf0e10cSrcweir          $id,
805cdf0e10cSrcweir          "PlaneOffsets[] = {\n";
806cdf0e10cSrcweir$uni_page_offset = 0;
807cdf0e10cSrcweir$uni_planeoffsets_used = 0;
808cdf0e10cSrcweir$uni_pageoffsets_used_sum = 0;
809cdf0e10cSrcweir$uni_pageoffsets_space_sum = 0;
810cdf0e10cSrcweir$uni_data_used_sum2 = 0;
811cdf0e10cSrcweir$uni_data_space_sum2 = 0;
812cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
813cdf0e10cSrcweir{
814cdf0e10cSrcweir    if (defined ($uni_plane_used[$uni_plane]))
815cdf0e10cSrcweir    {
816cdf0e10cSrcweir        print OUT "  ",
817cdf0e10cSrcweir                  $uni_page_offset++,
818cdf0e10cSrcweir                  " * 256, /* plane ",
819cdf0e10cSrcweir                  $uni_plane,
820cdf0e10cSrcweir                  "; ",
821cdf0e10cSrcweir                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
822cdf0e10cSrcweir                  ", ",
823cdf0e10cSrcweir                  printStats($uni_data_used_sum[$uni_plane],
824cdf0e10cSrcweir                             $uni_data_space_sum[$uni_plane]),
825cdf0e10cSrcweir                  " */\n";
826cdf0e10cSrcweir        $uni_planeoffsets_used += 4;
827cdf0e10cSrcweir        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
828cdf0e10cSrcweir        $uni_pageoffsets_space_sum += 256 * 4;
829cdf0e10cSrcweir        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
830cdf0e10cSrcweir        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
831cdf0e10cSrcweir    }
832cdf0e10cSrcweir    else
833cdf0e10cSrcweir    {
834cdf0e10cSrcweir        print OUT "  -1, /* plane ", $uni_plane, " */\n";
835cdf0e10cSrcweir    }
836cdf0e10cSrcweir}
837cdf0e10cSrcweirprint OUT " /* ",
838cdf0e10cSrcweir          printStats($uni_planeoffsets_used, 17 * 4),
839cdf0e10cSrcweir          ", ",
840cdf0e10cSrcweir          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
841cdf0e10cSrcweir          ", ",
842cdf0e10cSrcweir          printStats($uni_data_used_sum2, $uni_data_space_sum2),
843cdf0e10cSrcweir          " */\n};\n";
844cdf0e10cSrcweir
845cdf0e10cSrcweirclose OUT;
846cdf0e10cSrcweir
847cdf0e10cSrcweirprint "Unihan.txt = ", $count_Unihan_txt,
848cdf0e10cSrcweir      ", CNS11643.TXT = ", $count_CNS11643_TXT,
849cdf0e10cSrcweir      ", Uni2CNS = ", $count_Uni2CNS,
850cdf0e10cSrcweir      ", total = ",
851cdf0e10cSrcweir          ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS),
852cdf0e10cSrcweir      "\n";
853