1#!/usr/bin/perl
2#**************************************************************
3#
4#  Licensed to the Apache Software Foundation (ASF) under one
5#  or more contributor license agreements.  See the NOTICE file
6#  distributed with this work for additional information
7#  regarding copyright ownership.  The ASF licenses this file
8#  to you under the Apache License, Version 2.0 (the
9#  "License"); you may not use this file except in compliance
10#  with the License.  You may obtain a copy of the License at
11#
12#    http://www.apache.org/licenses/LICENSE-2.0
13#
14#  Unless required by applicable law or agreed to in writing,
15#  software distributed under the License is distributed on an
16#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17#  KIND, either express or implied.  See the License for the
18#  specific language governing permissions and limitations
19#  under the License.
20#
21#**************************************************************
22
23
24
25# The following files must be available in a ./input subdir:
26
27# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>:
28#  "Unicode version: 3.1.1    Table version: 1.1    Date: 28 June 2001"
29#  contains descriptions for:
30#   U+3400..4DFF CJK Unified Ideographs Extension A
31#   U+4E00..9FFF CJK Unified Ideographs
32#   U+F900..FAFF CJK Compatibility Ideographs
33#   U+20000..2F7FF CJK Unified Ideographs Extension B
34#   U+2F800..2FFFF CJK Compatibility Ideographs Supplement
35
36# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>:
37#  "Unicode version: 1.1    Table version: 0.0d1    Date: 21 October 1994"
38#  contains mappings for CNS 11643-1986
39
40# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>:
41#  "Unicode version: 1.1    Table version: 0.49    Date: 26 March 1998"
42#  contains mappings for CNS 11643-1992 that are incompatible with
43#   CNS11643.TXT
44
45$id = "Cns116431992";
46
47sub isValidUtf32
48{
49    my $utf32 = $_[0];
50    return $utf32 >= 0 && $utf32 <= 0x10FFFF
51           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
52           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
53           && ($utf32 & 0xFFFF) < 0xFFFE;
54}
55
56sub printUtf32
57{
58    my $utf32 = $_[0];
59    return sprintf("U+%04X", $utf32);
60}
61
62sub isValidCns116431992
63{
64    my $plane = $_[0];
65    my $row = $_[1];
66    my $column = $_[2];
67    return $plane >= 1 && $plane <= 16
68           && $row >= 1 && $row <= 94
69           && $column >= 1 && $column <= 94;
70}
71
72sub printCns116431992
73{
74    my $plane = $_[0];
75    my $row = $_[1];
76    my $column = $_[2];
77    return sprintf("%d-%02d/%02d", $plane, $row, $column);
78}
79
80sub printStats
81{
82    my $used = $_[0];
83    my $space = $_[1];
84    return sprintf("%d/%d bytes (%.1f%%)",
85                   $used,
86                   $space,
87                   $used * 100 / $space);
88}
89
90sub printSpaces
91{
92    my $column_width = $_[0];
93    my $columns_per_line = $_[1];
94    my $end = $_[2];
95    $output = "";
96    for ($i = int($end / $columns_per_line) * $columns_per_line;
97         $i < $end;
98         ++$i)
99    {
100        for ($j = 0; $j < $column_width; ++$j)
101        {
102            $output = $output . " ";
103        }
104    }
105    return $output;
106}
107
108$count_Unihan_txt = 0;
109$count_CNS11643_TXT = 0;
110$count_Uni2CNS = 0;
111
112if (1)
113{
114    $filename = "Unihan.txt";
115    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
116    while (<IN>)
117    {
118        if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
119        {
120            $utf32 = oct("0x" . $1);
121            $cns_plane = oct("0x" . $2);
122            $cns_row = oct("0x" . $3) - 0x20;
123            $cns_column = oct("0x" . $4) - 0x20;
124            isValidUtf32($utf32)
125                or die "Bad UTF32 char U+" . printUtf32($utf32);
126            isValidCns116431992($cns_plane, $cns_row, $cns_column)
127                or die "Bad CNS11643-1992 char "
128                           . printCns116431992($cns_plane,
129                                               $cns_row,
130                                               $cns_column);
131            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
132            {
133                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
134                $cns_plane_used[$cns_plane] = 1;
135                ++$count_Unihan_txt;
136            }
137            else
138            {
139                ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
140                    or die "Mapping "
141                               . printCns116431992($cns_plane,
142                                                   $cns_row,
143                                                   $cns_column)
144                               . " to "
145                               . printUtf32($cns_map[$cns_plane]
146                                                    [$cns_row]
147                                                    [$cns_column])
148                               . ", NOT "
149                               . printUtf32($utf32);
150            }
151        }
152        elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
153        {
154            $utf32 = oct("0x" . $1);
155            $cns_plane = oct("0x" . $2);
156            $cns_row = oct("0x" . $3) - 0x20;
157            $cns_column = oct("0x" . $4) - 0x20;
158            isValidUtf32($utf32)
159                or die "Bad UTF32 char U+" . printUtf32($utf32);
160            isValidCns116431992($cns_plane, $cns_row, $cns_column)
161                or die "Bad CNS11643-1992 char "
162                           . printCns116431992($cns_plane,
163                                               $cns_row,
164                                               $cns_column);
165            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
166            {
167                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
168                $cns_plane_used[$cns_plane] = 1;
169                ++$count_Unihan_txt;
170            }
171            else
172            {
173                ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
174                    or print "WARNING!  Mapping ",
175                             printCns116431992($cns_plane,
176                                               $cns_row,
177                                               $cns_column),
178                             " to ",
179                             printUtf32($cns_map[$cns_plane]
180                                                [$cns_row]
181                                                [$cns_column]),
182                             ", NOT ",
183                             printUtf32($utf32),
184                             "\n";
185            }
186        }
187        elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/)
188        {
189            die "Bad format";
190        }
191    }
192    close IN;
193}
194
195if (1)
196{
197    $filename = "CNS11643.TXT";
198    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
199    while (<IN>)
200    {
201        if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/)
202        {
203            $utf32 = oct("0x" . $4);
204            $cns_plane = oct("0x" . $1);
205            $cns_row = oct("0x" . $2) - 0x20;
206            $cns_column = oct("0x" . $3) - 0x20;
207            isValidUtf32($utf32)
208                or die "Bad UTF32 char U+" . printUtf32($utf32);
209            isValidCns116431992($cns_plane, $cns_row, $cns_column)
210                or die "Bad CNS11643-1992 char "
211                           . printCns116431992($cns_plane,
212                                               $cns_row,
213                                               $cns_column);
214            if ($cns_plane <= 2)
215            {
216                if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
217                {
218                    $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
219                    $cns_plane_used[$cns_plane] = 1;
220                    ++$count_CNS11643_TXT;
221                }
222                else
223                {
224                    ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
225                        or die "Mapping "
226                                   . printCns116431992($cns_plane,
227                                                       $cns_row,
228                                                       $cns_column)
229                                   . " to "
230                                   . printUtf32($cns_map[$cns_plane]
231                                                        [$cns_row]
232                                                        [$cns_column])
233                                   . ", NOT "
234                                   . printUtf32($utf32);
235                }
236            }
237        }
238    }
239    close IN;
240}
241
242if (0)
243{
244    $filename = "Uni2CNS";
245    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
246    while (<IN>)
247    {
248        if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/)
249        {
250            $utf32 = oct("0x" . $1);
251            $cns_plane = oct("0x" . $2);
252            $cns_row = oct("0x" . $3) - 0x20;
253            $cns_column = oct("0x" . $4) - 0x20;
254            isValidUtf32($utf32)
255                or die "Bad UTF32 char U+" . printUtf32($utf32);
256            isValidCns116431992($cns_plane, $cns_row, $cns_column)
257                or die "Bad CNS11643-1992 char "
258                           . printCns116431992($cns_plane,
259                                               $cns_row,
260                                               $cns_column);
261            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
262            {
263                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
264                $cns_plane_used[$cns_plane] = 1;
265                ++$count_Uni2CNS;
266            }
267            else
268            {
269#               ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
270#                   or die "Mapping "
271#                              . printCns116431992($cns_plane,
272#                                                  $cns_row,
273#                                                  $cns_column)
274#                              . " to "
275#                              . printUtf32($cns_map[$cns_plane]
276#                                                   [$cns_row]
277#                                                   [$cns_column])
278#                              . ", NOT "
279#                              . printUtf32($utf32);
280            }
281            if ($cns_plane == 1)
282            {
283                print printCns116431992($cns_plane, $cns_row, $cns_column),
284                      "\n";
285            }
286        }
287    }
288    close IN;
289}
290
291for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
292{
293    if (defined($cns_plane_used[$cns_plane]))
294    {
295        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
296        {
297            for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
298            {
299                if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
300                {
301                    $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
302                    $uni_plane = $utf32 >> 16;
303                    $uni_page = ($utf32 >> 8) & 0xFF;
304                    $uni_index = $utf32 & 0xFF;
305                    if (!defined($uni_plane_used[$uni_plane])
306                        || !defined($uni_page_used[$uni_plane][$uni_page])
307                        || !defined($uni_map[$uni_plane]
308                                            [$uni_page]
309                                            [$uni_index]))
310                    {
311                        $uni_map[$uni_plane][$uni_page][$uni_index]
312                            = ($cns_plane << 16)
313                                  | ($cns_row << 8)
314                                  | $cns_column;
315                        $uni_plane_used[$uni_plane] = 1;
316                        $uni_page_used[$uni_plane][$uni_page] = 1;
317                    }
318                    else
319                    {
320                        $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index];
321                        $cns1_plane = $cns1 >> 16;
322                        $cns1_row = ($cns1 >> 8) & 0xFF;
323                        $cns1_column = $cns1 & 0xFF;
324
325                        # Do not map from Unicode to Fictious Character Set
326                        # Extensions (Lunde, p. 131), if possible:
327                        if ($cns_plane == 3
328                            && ($cns_row == 66 && $cns_column > 38
329                                || $cns_row > 66))
330                        {
331                            print " (",
332                                  printUtf32($utf32),
333                                  " to fictious ",
334                                  printCns116431992($cns_plane,
335                                                    $cns_row,
336                                                    $cns_column),
337                                  " ignored, favouring ",
338                                  printCns116431992($cns1_plane,
339                                                    $cns1_row,
340                                                    $cns1_column),
341                                  ")\n";
342                        }
343                        elsif ($cns1_plane == 3
344                               && ($cns1_row == 66 && $cns1_column > 38
345                                   || $cns1_row > 66))
346                        {
347                            $uni_map[$uni_plane][$uni_page][$uni_index]
348                                = ($cns_plane << 16)
349                                       | ($cns_row << 8)
350                                       | $cns_column;
351                            print " (",
352                                  printUtf32($utf32),
353                                  " to fictious ",
354                                  printCns116431992($cns1_plane,
355                                                    $cns1_row,
356                                                    $cns1_column),
357                                  " ignored, favouring ",
358                                  printCns116431992($cns_plane,
359                                                    $cns_row,
360                                                    $cns_column),
361                                  ")\n";
362                        }
363                        else
364                        {
365                            print "WARNING!  Mapping ",
366                                  printUtf32($utf32),
367                                  " to ",
368                                  printCns116431992($cns1_plane,
369                                                    $cns1_row,
370                                                    $cns1_column),
371                                  ", NOT ",
372                                  printCns116431992($cns_plane,
373                                                    $cns_row,
374                                                    $cns_column),
375                                  "\n";
376                        }
377                    }
378                }
379            }
380        }
381    }
382}
383if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
384{
385    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
386    {
387        if (defined($uni_map[0][0][$uni_index]))
388        {
389            $cns = $uni_map[0][0][$utf32];
390            die "Mapping "
391                    . printUtf32($utf32)
392                    . " to "
393                    . printCns116431992($cns >> 16,
394                                        ($cns >> 8) & 0xFF,
395                                        $cns & 0xFF);
396        }
397    }
398}
399
400$filename = lc($id) . ".tab";
401open OUT, ("> " . $filename) or die "Cannot write " . $filename;
402
403{
404    $filename = lc($id). ".pl";
405    open IN, $filename or die "Cannot read ". $filename;
406    $first = 1;
407    while (<IN>)
408    {
409        if (/^\#!.*$/)
410        {
411        }
412        elsif (/^\#(\*.*)$/)
413        {
414            if ($first == 1)
415            {
416                print OUT "/", $1, "\n";
417                $first = 0;
418            }
419            else
420            {
421                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
422            }
423        }
424        elsif (/^\# (.*)$/)
425        {
426            print OUT " *", $1, "\n";
427        }
428        elsif (/^\#(.*)$/)
429        {
430            print OUT " *", $1, "\n";
431        }
432        else
433        {
434            goto done;
435        }
436    }
437  done:
438}
439
440print OUT "\n",
441          "#ifndef _SAL_TYPES_H_\n",
442          "#include \"sal/types.h\"\n",
443          "#endif\n",
444          "\n";
445
446print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
447$cns_data_index = 0;
448for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
449{
450    if (defined($cns_plane_used[$cns_plane]))
451    {
452        $cns_rows = 0;
453        $cns_chars = 0;
454        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
455        {
456            $cns_row_first = -1;
457            for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
458            {
459                if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
460                {
461                    if ($cns_row_first == -1)
462                    {
463                        $cns_row_first = $cns_column;
464                    }
465                    $cns_row_last = $cns_column;
466                }
467            }
468            if ($cns_row_first != -1)
469            {
470                $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index;
471                ++$cns_rows;
472                print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
473                          " */\n";
474
475                $cns_row_surrogates_first = -1;
476                $cns_row_chars = 0;
477                $cns_row_surrogates = 0;
478
479                print OUT "  ", $cns_row_first, " | (", $cns_row_last,
480                          " << 8), /* first, last */\n";
481                ++$cns_data_index;
482
483                print OUT "  ", printSpaces(7, 10, $cns_row_first);
484                $bol = 0;
485                for ($cns_column = $cns_row_first;
486                     $cns_column <= $cns_row_last;
487                     ++$cns_column)
488                {
489                    if ($bol == 1)
490                    {
491                        print OUT "  ";
492                        $bol = 0;
493                    }
494                    if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
495                    {
496                        $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
497                        ++$cns_row_chars;
498                        if ($utf32 <= 0xFFFF)
499                        {
500                            printf OUT "0x%04X,", $utf32;
501                        }
502                        else
503                        {
504                            ++$cns_row_surrogates;
505                            printf OUT "0x%04X,",
506                                       (0xD800 | (($utf32 - 0x10000) >> 10));
507                            if ($cns_row_surrogates_first == -1)
508                            {
509                                $cns_row_surrogates_first = $cns_column;
510                            }
511                            $cns_row_surrogates_last = $cns_column;
512                        }
513                    }
514                    else
515                    {
516                        printf OUT "0xffff,";
517                    }
518                    ++$cns_data_index;
519                    if ($cns_column % 10 == 9)
520                    {
521                        print OUT "\n";
522                        $bol = 1;
523                    }
524                }
525                if ($bol == 0)
526                {
527                    print OUT "\n";
528                }
529
530                if ($cns_row_surrogates_first != -1)
531                {
532                    print OUT "  ", $cns_row_surrogates_first,
533                              ", /* first low-surrogate */\n";
534                    ++$cns_data_index;
535
536                    print OUT "  ",
537                              printSpaces(7, 10, $cns_row_surrogates_first);
538                    $bol = 0;
539                    for ($cns_column = $cns_row_surrogates_first;
540                         $cns_column <= $cns_row_surrogates_last;
541                         ++$cns_column)
542                    {
543                        if ($bol == 1)
544                        {
545                            print OUT "  ";
546                            $bol = 0;
547                        }
548                        $utf32 = 0;
549                        if (defined($cns_map[$cns_plane]
550                                            [$cns_row]
551                                            [$cns_column]))
552                        {
553                            $utf32
554                                = $cns_map[$cns_plane][$cns_row][$cns_column];
555                        }
556                        if ($utf32 <= 0xFFFF)
557                        {
558                            printf OUT "     0,";
559                        }
560                        else
561                        {
562                            printf OUT "0x%04X,",
563                                       (0xDC00
564                                            | (($utf32 - 0x10000) & 0x3FF));
565                        }
566                        ++$cns_data_index;
567                        if ($cns_column % 10 == 9)
568                        {
569                            print OUT "\n";
570                            $bol = 1;
571                        }
572                    }
573                    if ($bol == 0)
574                    {
575                        print OUT "\n";
576                    }
577                }
578
579                $cns_chars += $cns_row_chars;
580                $cns_data_space[$cns_plane][$cns_row]
581                    = ($cns_data_index
582                           - $cns_data_offsets[$cns_plane][$cns_row]) * 2;
583                $cns_data_used[$cns_plane][$cns_row]
584                    = (1 + $cns_row_chars
585                           + ($cns_row_surrogates == 0 ?
586                                  0 : 1 + $cns_row_surrogates)) * 2;
587            }
588            else
589            {
590                print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
591                          ": --- */\n";
592                $cns_data_offsets[$cns_plane][$cns_row] = -1;
593            }
594        }
595        print "cns plane ",
596              $cns_plane,
597              ": rows = ",
598              $cns_rows,
599              ", chars = ",
600              $cns_chars,
601              "\n";
602    }
603}
604print OUT "};\n\n";
605
606print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
607for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
608{
609    if (defined ($cns_plane_used[$cns_plane]))
610    {
611        $cns_rowoffsets_used[$cns_plane] = 0;
612        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
613        {
614            if ($cns_data_offsets[$cns_plane][$cns_row] == -1)
615            {
616                print OUT "  -1, /* plane ",
617                          $cns_plane,
618                          ", row ",
619                          $cns_row,
620                          " */\n";
621            }
622            else
623            {
624                print OUT "  ",
625                          $cns_data_offsets[$cns_plane][$cns_row],
626                          ", /* plane ",
627                          $cns_plane,
628                          ", row ",
629                          $cns_row,
630                          "; ",
631                          printStats($cns_data_used[$cns_plane][$cns_row],
632                                     $cns_data_space[$cns_plane][$cns_row]),
633                          " */\n";
634                $cns_rowoffsets_used[$cns_plane] += 4;
635            }
636        }
637    }
638    else
639    {
640        print OUT "  /* plane ", $cns_plane, ": --- */\n";
641    }
642}
643print OUT "};\n\n";
644
645print OUT "static sal_Int32 const aImpl",
646          $id,
647          "ToUnicodePlaneOffsets[] = {\n";
648$cns_row_offset = 0;
649for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
650{
651    if (defined ($cns_plane_used[$cns_plane]))
652    {
653        print OUT "  ",
654                  $cns_row_offset++,
655                  " * 94, /* plane ",
656                  $cns_plane,
657                  "; ",
658                  printStats($cns_rowoffsets_used[$cns_plane], 94 * 4),
659                  " */\n";
660    }
661    else
662    {
663        print OUT "  -1, /* plane ", $cns_plane, " */\n";
664    }
665}
666print OUT "};\n\n";
667
668print OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n";
669$uni_data_index = 0;
670for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
671{
672    if (defined($uni_plane_used[$uni_plane]))
673    {
674        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
675        {
676            if (defined($uni_page_used[$uni_plane][$uni_page]))
677            {
678                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
679                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
680                          " */\n";
681
682                $uni_page_first = -1;
683                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
684                {
685                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
686                    {
687                        if ($uni_page_first == -1)
688                        {
689                            $uni_page_first = $uni_index;
690                        }
691                        $uni_page_last = $uni_index;
692                    }
693                }
694
695                $uni_data_used[$uni_plane][$uni_page] = 0;
696
697                print OUT "  ", $uni_page_first, ", ", $uni_page_last,
698                          ", /* first, last */\n";
699                $uni_data_index += 2;
700                $uni_data_used[$uni_plane][$uni_page] += 2;
701
702                print OUT "  ", printSpaces(9, 8, $uni_page_first);
703                $bol = 0;
704                for ($uni_index = $uni_page_first;
705                     $uni_index <= $uni_page_last;
706                     ++$uni_index)
707                {
708                    if ($bol == 1)
709                    {
710                        print OUT "  ";
711                        $bol = 0;
712                    }
713                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
714                    {
715                        $cns = $uni_map[$uni_plane][$uni_page][$uni_index];
716                        printf OUT "%2d,%2d,%2d,",
717                                   $cns >> 16,
718                                   $cns >> 8 & 0xFF,
719                                   $cns & 0xFF;
720                        $uni_data_used[$uni_plane][$uni_page] += 3;
721                    }
722                    else
723                    {
724                        print OUT " 0, 0, 0,";
725                    }
726                    $uni_data_index += 3;
727                    if ($uni_index % 8 == 7)
728                    {
729                        print OUT "\n";
730                        $bol = 1;
731                    }
732                }
733                if ($bol == 0)
734                {
735                    print OUT "\n";
736                }
737
738                $uni_data_space[$uni_plane][$uni_page]
739                    = $uni_data_index
740                          - $uni_data_offsets[$uni_plane][$uni_page];
741            }
742            else
743            {
744                $uni_data_offsets[$uni_plane][$uni_page] = -1;
745                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
746                          ": --- */\n";
747            }
748        }
749    }
750    else
751    {
752        print OUT " /* plane ", $uni_plane, ": --- */\n";
753    }
754}
755print OUT "};\n\n";
756
757print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
758for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
759{
760    if (defined($uni_plane_used[$uni_plane]))
761    {
762        $uni_pageoffsets_used[$uni_plane] = 0;
763        $uni_data_used_sum[$uni_plane] = 0;
764        $uni_data_space_sum[$uni_plane] = 0;
765        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
766        {
767            $offset = $uni_data_offsets[$uni_plane][$uni_page];
768            if ($offset == -1)
769            {
770                print OUT "  -1, /* plane ",
771                          $uni_plane,
772                          ", page ",
773                          $uni_page,
774                          " */\n";
775            }
776            else
777            {
778                print OUT "  ",
779                          $offset,
780                          ", /* plane ",
781                          $uni_plane,
782                          ", page ",
783                          $uni_page,
784                          "; ",
785                          printStats($uni_data_used[$uni_plane][$uni_page],
786                                     $uni_data_space[$uni_plane][$uni_page]),
787                          " */\n";
788                $uni_pageoffsets_used[$uni_plane] += 4;
789                $uni_data_used_sum[$uni_plane]
790                    += $uni_data_used[$uni_plane][$uni_page];
791                $uni_data_space_sum[$uni_plane]
792                    += $uni_data_space[$uni_plane][$uni_page];
793            }
794        }
795    }
796    else
797    {
798        print OUT "  /* plane ", $uni_plane, ": --- */\n";
799    }
800}
801print OUT "};\n\n";
802
803print OUT "static sal_Int32 const aImplUnicodeTo",
804          $id,
805          "PlaneOffsets[] = {\n";
806$uni_page_offset = 0;
807$uni_planeoffsets_used = 0;
808$uni_pageoffsets_used_sum = 0;
809$uni_pageoffsets_space_sum = 0;
810$uni_data_used_sum2 = 0;
811$uni_data_space_sum2 = 0;
812for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
813{
814    if (defined ($uni_plane_used[$uni_plane]))
815    {
816        print OUT "  ",
817                  $uni_page_offset++,
818                  " * 256, /* plane ",
819                  $uni_plane,
820                  "; ",
821                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
822                  ", ",
823                  printStats($uni_data_used_sum[$uni_plane],
824                             $uni_data_space_sum[$uni_plane]),
825                  " */\n";
826        $uni_planeoffsets_used += 4;
827        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
828        $uni_pageoffsets_space_sum += 256 * 4;
829        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
830        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
831    }
832    else
833    {
834        print OUT "  -1, /* plane ", $uni_plane, " */\n";
835    }
836}
837print OUT " /* ",
838          printStats($uni_planeoffsets_used, 17 * 4),
839          ", ",
840          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
841          ", ",
842          printStats($uni_data_used_sum2, $uni_data_space_sum2),
843          " */\n};\n";
844
845close OUT;
846
847print "Unihan.txt = ", $count_Unihan_txt,
848      ", CNS11643.TXT = ", $count_CNS11643_TXT,
849      ", Uni2CNS = ", $count_Uni2CNS,
850      ", total = ",
851          ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS),
852      "\n";
853