1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // prevent internal compiler error with MSVC6SP3
29 #include <utility>
30 #include <i18nutil/widthfolding.hxx>
31 #include <i18nutil/x_rtl_ustring.h>
32 #include "widthfolding_data.h"
33 
34 using namespace com::sun::star::uno;
35 using namespace rtl;
36 
37 namespace com { namespace sun { namespace star { namespace i18n {
38 
39 sal_Unicode widthfolding::decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar)
40 {
41     if (0x30a0 <= inChar && inChar <= 0x30ff) {
42       sal_Int16 i = inChar - 0x3040;
43       if (decomposition_table[i].decomposited_character_1)
44           return 0xFFFF;
45     }
46     return inChar;
47 }
48 
49 /**
50  * Decompose Japanese specific voiced and semi-voiced sound marks.
51  */
52 OUString widthfolding::decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset )
53 {
54   // Create a string buffer which can hold nCount * 2 + 1 characters.
55   // Its size may become double of nCount.
56   rtl_uString * newStr;
57   x_rtl_uString_new_WithLength( &newStr, nCount * 2 ); // defined in x_rtl_ustring.h  The reference count is 0 now.
58 
59   sal_Int32 *p = NULL;
60   sal_Int32 position = 0;
61   if (useOffset) {
62       // Allocate double of nCount length to offset argument.
63       offset.realloc( nCount * 2 );
64       p = offset.getArray();
65       position = startPos;
66   }
67 
68   // Prepare pointers of unicode character arrays.
69   const sal_Unicode* src = inStr.getStr() + startPos;
70   sal_Unicode* dst = newStr->buffer;
71 
72   // Decomposition: GA --> KA + voice-mark
73   while (nCount -- > 0) {
74     sal_Unicode c = *src++;
75     // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
76     // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
77     // Hiragana is not applied to decomposition.
78     // Only Katakana is applied to decomposition
79     if (0x30a0 <= c && c <= 0x30ff) {
80       int i = int(c - 0x3040);
81       sal_Unicode first = decomposition_table[i].decomposited_character_1;
82       if (first != 0x0000) {
83 	*dst ++ = first;
84 	*dst ++ = decomposition_table[i].decomposited_character_2; // second
85         if (useOffset) {
86             *p ++ = position;
87             *p ++ = position ++;
88         }
89 	continue;
90       }
91     }
92     *dst ++ = c;
93     if (useOffset)
94         *p ++ = position ++;
95   }
96   *dst = (sal_Unicode) 0;
97 
98   newStr->length = sal_Int32(dst - newStr->buffer);
99   if (useOffset)
100       offset.realloc(newStr->length);
101   return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1.
102 }
103 
104 oneToOneMapping& widthfolding::getfull2halfTable(void)
105 {
106     static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_NORMAL);
107     table.makeIndex();
108     return table;
109 }
110 
111 /**
112  * Compose Japanese specific voiced and semi-voiced sound marks.
113  */
114 OUString widthfolding::compose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset, sal_Int32 nFlags )
115 {
116   // Create a string buffer which can hold nCount + 1 characters.
117   // Its size may become equal to nCount or smaller.
118   // The reference count is 0 now.
119   rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h
120 
121   // Prepare pointers of unicode character arrays.
122   const sal_Unicode* src = inStr.getStr() + startPos;
123   sal_Unicode* dst = newStr->buffer;
124 
125   // This conversion algorithm requires at least one character.
126  if (nCount > 0) {
127 
128   // .. .. KA         VOICE .. ..
129   //       ^          ^
130   //       previousChar   currentChar
131   //       ^
132   //       position
133   //
134   // will be converted to
135   // .. .. GA       .. ..
136 
137   sal_Int32 *p = NULL;
138   sal_Int32 position = 0;
139   if (useOffset) {
140       // Allocate nCount length to offset argument.
141       offset.realloc( nCount );
142       p = offset.getArray();
143       position = startPos;
144   }
145 
146   //
147   sal_Unicode previousChar = *src ++;
148   sal_Unicode currentChar;
149 
150   // Composition: KA + voice-mark --> GA
151   while (-- nCount > 0) {
152     currentChar = *src ++;
153     // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
154     // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
155     // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
156     // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
157     // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
158     // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
159     int j = currentChar - 0x3099; // 0x3099, 0x309a, 0x309b, 0x309c ?
160 
161     if (2 <= j && j <= 3) // 0x309b or 0x309c
162         j -= 2;
163 
164     if (0 <= j && j <= 1) {
165       // 0 addresses a code point regarding 0x3099 or 0x309b (voiced sound mark),
166       // 1 is 0x309a or 0x309c (semi-voiced sound mark)
167       int i = int(previousChar - 0x3040); // i acts as an index of array
168       sal_Bool bCompose = sal_False;
169 
170       if (0 <= i && i <= (0x30ff - 0x3040) && composition_table[i][j])
171         bCompose = sal_True;
172 
173       // not to use combined KATAKANA LETTER VU
174       if ( previousChar == 0x30a6 && (nFlags & WIDTHFOLDNIG_DONT_USE_COMBINED_VU) )
175         bCompose = sal_False;
176 
177       if( bCompose ){
178         if (useOffset) {
179             position ++;
180             *p ++ = position ++;
181         }
182 	*dst ++ =  composition_table[i][j];
183 	previousChar = *src ++;
184 	nCount --;
185 	continue;
186       }
187     }
188     if (useOffset)
189         *p ++ = position ++;
190     *dst ++ = previousChar;
191     previousChar = currentChar;
192   }
193 
194   if (nCount == 0) {
195     if (useOffset)
196         *p = position;
197     *dst ++ = previousChar;
198   }
199 
200   *dst = (sal_Unicode) 0;
201 
202   newStr->length = sal_Int32(dst - newStr->buffer);
203  }
204   if (useOffset)
205       offset.realloc(newStr->length);
206   return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1.
207 }
208 
209 oneToOneMapping& widthfolding::gethalf2fullTable(void)
210 {
211     static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_NORMAL);
212     table.makeIndex();
213     return table;
214 }
215 
216 sal_Unicode widthfolding::getCompositionChar(sal_Unicode c1, sal_Unicode c2)
217 {
218     return composition_table[c1 - 0x3040][c2 - 0x3099];
219 }
220 
221 
222 oneToOneMapping& widthfolding::getfull2halfTableForASC()
223 {
224     static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_ASC_FUNCTION);
225     table.makeIndex();
226 
227     // bluedwarf: dirty hack!
228     // There is an exception. Additional conversion is required following:
229     //  0xFFE5 (FULLWIDTH YEN SIGN)  --> 0x005C (REVERSE SOLIDUS)
230     //
231     //  See the following page for detail:
232     // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions
233     int i, j, high, low;
234     int n = sizeof(full2halfASCException) / sizeof(UnicodePairWithFlag);
235     for( i = 0; i < n; i++ )
236     {
237         high = (full2halfASCException[i].first >> 8) & 0xFF;
238         low  = (full2halfASCException[i].first)      & 0xFF;
239 
240         if( !table.mpIndex[high] )
241         {
242             table.mpIndex[high] = new UnicodePairWithFlag*[256];
243 
244             for( j = 0; j < 256; j++ )
245                 table.mpIndex[high][j] = NULL;
246         }
247         table.mpIndex[high][low] = &full2halfASCException[i];
248     }
249 
250     return table;
251 }
252 
253 oneToOneMapping& widthfolding::gethalf2fullTableForJIS()
254 {
255     static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_JIS_FUNCTION);
256     table.makeIndex();
257 
258     // bluedwarf: dirty hack!
259     //  There are some exceptions. Additional conversion are required following:
260     //  0x0022 (QUOTATION MARK)  --> 0x201D (RIGHT DOUBLE QUOTATION MARK)
261     //  0x0027 (APOSTROPHE)      --> 0x2019 (RIGHT SINGLE QUOTATION MARK)
262     //  0x005C (REVERSE SOLIDUS) --> 0xFFE5 (FULLWIDTH YEN SIGN)
263     //  0x0060 (GRAVE ACCENT)    --> 0x2018 (LEFT SINGLE QUOTATION MARK)
264     //
265     //  See the following page for detail:
266     // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions
267     int i, j, high, low;
268     int n = sizeof(half2fullJISException) / sizeof(UnicodePairWithFlag);
269     for( i = 0; i < n; i++ )
270     {
271         high = (half2fullJISException[i].first >> 8) & 0xFF;
272         low  = (half2fullJISException[i].first)      & 0xFF;
273 
274         if( !table.mpIndex[high] )
275         {
276             table.mpIndex[high] = new UnicodePairWithFlag*[256];
277 
278             for( j = 0; j < 256; j++ )
279                 table.mpIndex[high][j] = NULL;
280         }
281         table.mpIndex[high][low] = &half2fullJISException[i];
282     }
283 
284     return table;
285 }
286 
287 oneToOneMapping& widthfolding::getfullKana2halfKanaTable()
288 {
289     static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_KATAKANA_ONLY);
290     table.makeIndex();
291     return table;
292 }
293 
294 oneToOneMapping& widthfolding::gethalfKana2fullKanaTable()
295 {
296     static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_KATAKANA_ONLY);
297     table.makeIndex();
298     return table;
299 }
300 
301 } } } }
302