1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26
27 // prevent internal compiler error with MSVC6SP3
28 #include <utility>
29
30 #define TRANSLITERATION_ProlongedSoundMark_ja_JP
31 #include <transliteration_Ignore.hxx>
32
33 using namespace com::sun::star::uno;
34 using namespace com::sun::star::lang;
35 using namespace rtl;
36
37 namespace com { namespace sun { namespace star { namespace i18n {
38
39 static sal_Unicode table_normalwidth[] = {
40 // 0x0000, // 0x3040
41 0x3041, // 0x3041 HIRAGANA LETTER SMALL A
42 0x3042, // 0x3042 HIRAGANA LETTER A
43 0x3043, // 0x3043 HIRAGANA LETTER SMALL I
44 0x3044, // 0x3044 HIRAGANA LETTER I
45 0x3045, // 0x3045 HIRAGANA LETTER SMALL U
46 0x3046, // 0x3046 HIRAGANA LETTER U
47 0x3047, // 0x3047 HIRAGANA LETTER SMALL E
48 0x3048, // 0x3048 HIRAGANA LETTER E
49 0x3049, // 0x3049 HIRAGANA LETTER SMALL O
50 0x304a, // 0x304a HIRAGANA LETTER O
51 0x3042, // 0x304b HIRAGANA LETTER KA
52 0x3042, // 0x304c HIRAGANA LETTER GA
53 0x3044, // 0x304d HIRAGANA LETTER KI
54 0x3044, // 0x304e HIRAGANA LETTER GI
55 0x3046, // 0x304f HIRAGANA LETTER KU
56 0x3046, // 0x3050 HIRAGANA LETTER GU
57 0x3048, // 0x3051 HIRAGANA LETTER KE
58 0x3048, // 0x3052 HIRAGANA LETTER GE
59 0x304a, // 0x3053 HIRAGANA LETTER KO
60 0x304a, // 0x3054 HIRAGANA LETTER GO
61 0x3042, // 0x3055 HIRAGANA LETTER SA
62 0x3042, // 0x3056 HIRAGANA LETTER ZA
63 0x3044, // 0x3057 HIRAGANA LETTER SI
64 0x3044, // 0x3058 HIRAGANA LETTER ZI
65 0x3046, // 0x3059 HIRAGANA LETTER SU
66 0x3046, // 0x305a HIRAGANA LETTER ZU
67 0x3048, // 0x305b HIRAGANA LETTER SE
68 0x3048, // 0x305c HIRAGANA LETTER ZE
69 0x304a, // 0x305d HIRAGANA LETTER SO
70 0x304a, // 0x305e HIRAGANA LETTER ZO
71 0x3042, // 0x305f HIRAGANA LETTER TA
72 0x3042, // 0x3060 HIRAGANA LETTER DA
73 0x3044, // 0x3061 HIRAGANA LETTER TI
74 0x3044, // 0x3062 HIRAGANA LETTER DI
75 0x3045, // 0x3063 HIRAGANA LETTER SMALL TU
76 0x3046, // 0x3064 HIRAGANA LETTER TU
77 0x3046, // 0x3065 HIRAGANA LETTER DU
78 0x3048, // 0x3066 HIRAGANA LETTER TE
79 0x3048, // 0x3067 HIRAGANA LETTER DE
80 0x304a, // 0x3068 HIRAGANA LETTER TO
81 0x304a, // 0x3069 HIRAGANA LETTER DO
82 0x3042, // 0x306a HIRAGANA LETTER NA
83 0x3044, // 0x306b HIRAGANA LETTER NI
84 0x3046, // 0x306c HIRAGANA LETTER NU
85 0x3048, // 0x306d HIRAGANA LETTER NE
86 0x304a, // 0x306e HIRAGANA LETTER NO
87 0x3042, // 0x306f HIRAGANA LETTER HA
88 0x3042, // 0x3070 HIRAGANA LETTER BA
89 0x3042, // 0x3071 HIRAGANA LETTER PA
90 0x3044, // 0x3072 HIRAGANA LETTER HI
91 0x3044, // 0x3073 HIRAGANA LETTER BI
92 0x3044, // 0x3074 HIRAGANA LETTER PI
93 0x3046, // 0x3075 HIRAGANA LETTER HU
94 0x3046, // 0x3076 HIRAGANA LETTER BU
95 0x3046, // 0x3077 HIRAGANA LETTER PU
96 0x3048, // 0x3078 HIRAGANA LETTER HE
97 0x3048, // 0x3079 HIRAGANA LETTER BE
98 0x3048, // 0x307a HIRAGANA LETTER PE
99 0x304a, // 0x307b HIRAGANA LETTER HO
100 0x304a, // 0x307c HIRAGANA LETTER BO
101 0x304a, // 0x307d HIRAGANA LETTER PO
102 0x3042, // 0x307e HIRAGANA LETTER MA
103 0x3044, // 0x307f HIRAGANA LETTER MI
104 0x3046, // 0x3080 HIRAGANA LETTER MU
105 0x3048, // 0x3081 HIRAGANA LETTER ME
106 0x304a, // 0x3082 HIRAGANA LETTER MO
107 0x3041, // 0x3083 HIRAGANA LETTER SMALL YA
108 0x3042, // 0x3084 HIRAGANA LETTER YA
109 0x3045, // 0x3085 HIRAGANA LETTER SMALL YU
110 0x3046, // 0x3086 HIRAGANA LETTER YU
111 0x3049, // 0x3087 HIRAGANA LETTER SMALL YO
112 0x304a, // 0x3088 HIRAGANA LETTER YO
113 0x3042, // 0x3089 HIRAGANA LETTER RA
114 0x3044, // 0x308a HIRAGANA LETTER RI
115 0x3046, // 0x308b HIRAGANA LETTER RU
116 0x3048, // 0x308c HIRAGANA LETTER RE
117 0x304a, // 0x308d HIRAGANA LETTER RO
118 0x3041, // 0x308e HIRAGANA LETTER SMALL WA
119 0x3042, // 0x308f HIRAGANA LETTER WA
120 0x3044, // 0x3090 HIRAGANA LETTER WI
121 0x3048, // 0x3091 HIRAGANA LETTER WE
122 0x304a, // 0x3092 HIRAGANA LETTER WO
123 0x0000, // 0x3093 HIRAGANA LETTER N
124 0x3046, // 0x3094 HIRAGANA LETTER VU
125 0x0000, // 0x3095
126 0x0000, // 0x3096
127 0x0000, // 0x3097
128 0x0000, // 0x3098
129 0x0000, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
130 0x0000, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
131 0x0000, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
132 0x0000, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
133 0x0000, // 0x309d HIRAGANA ITERATION MARK
134 0x0000, // 0x309e HIRAGANA VOICED ITERATION MARK
135 0x0000, // 0x309f
136 0x0000, // 0x30a0
137 0x30a1, // 0x30a1 KATAKANA LETTER SMALL A
138 0x30a2, // 0x30a2 KATAKANA LETTER A
139 0x30a3, // 0x30a3 KATAKANA LETTER SMALL I
140 0x30a4, // 0x30a4 KATAKANA LETTER I
141 0x30a5, // 0x30a5 KATAKANA LETTER SMALL U
142 0x30a6, // 0x30a6 KATAKANA LETTER U
143 0x30a7, // 0x30a7 KATAKANA LETTER SMALL E
144 0x30a8, // 0x30a8 KATAKANA LETTER E
145 0x30a9, // 0x30a9 KATAKANA LETTER SMALL O
146 0x30aa, // 0x30aa KATAKANA LETTER O
147 0x30a2, // 0x30ab KATAKANA LETTER KA
148 0x30a2, // 0x30ac KATAKANA LETTER GA
149 0x30a4, // 0x30ad KATAKANA LETTER KI
150 0x30a4, // 0x30ae KATAKANA LETTER GI
151 0x30a6, // 0x30af KATAKANA LETTER KU
152 0x30a6, // 0x30b0 KATAKANA LETTER GU
153 0x30a8, // 0x30b1 KATAKANA LETTER KE
154 0x30a8, // 0x30b2 KATAKANA LETTER GE
155 0x30aa, // 0x30b3 KATAKANA LETTER KO
156 0x30aa, // 0x30b4 KATAKANA LETTER GO
157 0x30a2, // 0x30b5 KATAKANA LETTER SA
158 0x30a2, // 0x30b6 KATAKANA LETTER ZA
159 0x30a4, // 0x30b7 KATAKANA LETTER SI
160 0x30a4, // 0x30b8 KATAKANA LETTER ZI
161 0x30a6, // 0x30b9 KATAKANA LETTER SU
162 0x30a6, // 0x30ba KATAKANA LETTER ZU
163 0x30a8, // 0x30bb KATAKANA LETTER SE
164 0x30a8, // 0x30bc KATAKANA LETTER ZE
165 0x30aa, // 0x30bd KATAKANA LETTER SO
166 0x30aa, // 0x30be KATAKANA LETTER ZO
167 0x30a2, // 0x30bf KATAKANA LETTER TA
168 0x30a2, // 0x30c0 KATAKANA LETTER DA
169 0x30a4, // 0x30c1 KATAKANA LETTER TI
170 0x30a4, // 0x30c2 KATAKANA LETTER DI
171 0x30a5, // 0x30c3 KATAKANA LETTER SMALL TU
172 0x30a6, // 0x30c4 KATAKANA LETTER TU
173 0x30a6, // 0x30c5 KATAKANA LETTER DU
174 0x30a8, // 0x30c6 KATAKANA LETTER TE
175 0x30a8, // 0x30c7 KATAKANA LETTER DE
176 0x30aa, // 0x30c8 KATAKANA LETTER TO
177 0x30aa, // 0x30c9 KATAKANA LETTER DO
178 0x30a2, // 0x30ca KATAKANA LETTER NA
179 0x30a4, // 0x30cb KATAKANA LETTER NI
180 0x30a6, // 0x30cc KATAKANA LETTER NU
181 0x30a8, // 0x30cd KATAKANA LETTER NE
182 0x30aa, // 0x30ce KATAKANA LETTER NO
183 0x30a2, // 0x30cf KATAKANA LETTER HA
184 0x30a2, // 0x30d0 KATAKANA LETTER BA
185 0x30a2, // 0x30d1 KATAKANA LETTER PA
186 0x30a4, // 0x30d2 KATAKANA LETTER HI
187 0x30a4, // 0x30d3 KATAKANA LETTER BI
188 0x30a4, // 0x30d4 KATAKANA LETTER PI
189 0x30a6, // 0x30d5 KATAKANA LETTER HU
190 0x30a6, // 0x30d6 KATAKANA LETTER BU
191 0x30a6, // 0x30d7 KATAKANA LETTER PU
192 0x30a8, // 0x30d8 KATAKANA LETTER HE
193 0x30a8, // 0x30d9 KATAKANA LETTER BE
194 0x30a8, // 0x30da KATAKANA LETTER PE
195 0x30aa, // 0x30db KATAKANA LETTER HO
196 0x30aa, // 0x30dc KATAKANA LETTER BO
197 0x30aa, // 0x30dd KATAKANA LETTER PO
198 0x30a2, // 0x30de KATAKANA LETTER MA
199 0x30a4, // 0x30df KATAKANA LETTER MI
200 0x30a6, // 0x30e0 KATAKANA LETTER MU
201 0x30a8, // 0x30e1 KATAKANA LETTER ME
202 0x30aa, // 0x30e2 KATAKANA LETTER MO
203 0x30a1, // 0x30e3 KATAKANA LETTER SMALL YA
204 0x30a2, // 0x30e4 KATAKANA LETTER YA
205 0x30a5, // 0x30e5 KATAKANA LETTER SMALL YU
206 0x30a6, // 0x30e6 KATAKANA LETTER YU
207 0x30a9, // 0x30e7 KATAKANA LETTER SMALL YO
208 0x30aa, // 0x30e8 KATAKANA LETTER YO
209 0x30a2, // 0x30e9 KATAKANA LETTER RA
210 0x30a4, // 0x30ea KATAKANA LETTER RI
211 0x30a6, // 0x30eb KATAKANA LETTER RU
212 0x30a8, // 0x30ec KATAKANA LETTER RE
213 0x30aa, // 0x30ed KATAKANA LETTER RO
214 0x30a1, // 0x30ee KATAKANA LETTER SMALL WA
215 0x30a2, // 0x30ef KATAKANA LETTER WA
216 0x30a4, // 0x30f0 KATAKANA LETTER WI
217 0x30a8, // 0x30f1 KATAKANA LETTER WE
218 0x30aa, // 0x30f2 KATAKANA LETTER WO
219 0x0000, // 0x30f3 KATAKANA LETTER N
220 0x30a6, // 0x30f4 KATAKANA LETTER VU
221 0x30a1, // 0x30f5 KATAKANA LETTER SMALL KA
222 0x30a7, // 0x30f6 KATAKANA LETTER SMALL KE
223 0x30a2, // 0x30f7 KATAKANA LETTER VA
224 0x30a4, // 0x30f8 KATAKANA LETTER VI
225 0x30a8, // 0x30f9 KATAKANA LETTER VE
226 0x30aa // 0x30fa KATAKANA LETTER VO
227 // 0x0000, // 0x30fb KATAKANA MIDDLE DOT
228 // 0x0000, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK
229 // 0x0000, // 0x30fd KATAKANA ITERATION MARK
230 // 0x0000, // 0x30fe KATAKANA VOICED ITERATION MARK
231 // 0x0000 // 0x30ff
232 };
233
234 static sal_Unicode table_halfwidth[] = {
235 // 0x0000, // 0xff61 HALFWIDTH IDEOGRAPHIC FULL STOP
236 // 0x0000, // 0xff62 HALFWIDTH LEFT CORNER BRACKET
237 // 0x0000, // 0xff63 HALFWIDTH RIGHT CORNER BRACKET
238 // 0x0000, // 0xff64 HALFWIDTH IDEOGRAPHIC COMMA
239 // 0x0000, // 0xff65 HALFWIDTH KATAKANA MIDDLE DOT
240 0xff75, // 0xff66 HALFWIDTH KATAKANA LETTER WO
241 0xff67, // 0xff67 HALFWIDTH KATAKANA LETTER SMALL A
242 0xff68, // 0xff68 HALFWIDTH KATAKANA LETTER SMALL I
243 0xff69, // 0xff69 HALFWIDTH KATAKANA LETTER SMALL U
244 0xff6a, // 0xff6a HALFWIDTH KATAKANA LETTER SMALL E
245 0xff6b, // 0xff6b HALFWIDTH KATAKANA LETTER SMALL O
246 0xff67, // 0xff6c HALFWIDTH KATAKANA LETTER SMALL YA
247 0xff69, // 0xff6d HALFWIDTH KATAKANA LETTER SMALL YU
248 0xff6b, // 0xff6e HALFWIDTH KATAKANA LETTER SMALL YO
249 0xff69, // 0xff6f HALFWIDTH KATAKANA LETTER SMALL TU
250 0x0000, // 0xff70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
251 0xff71, // 0xff71 HALFWIDTH KATAKANA LETTER A
252 0xff72, // 0xff72 HALFWIDTH KATAKANA LETTER I
253 0xff73, // 0xff73 HALFWIDTH KATAKANA LETTER U
254 0xff74, // 0xff74 HALFWIDTH KATAKANA LETTER E
255 0xff75, // 0xff75 HALFWIDTH KATAKANA LETTER O
256 0xff71, // 0xff76 HALFWIDTH KATAKANA LETTER KA
257 0xff72, // 0xff77 HALFWIDTH KATAKANA LETTER KI
258 0xff73, // 0xff78 HALFWIDTH KATAKANA LETTER KU
259 0xff74, // 0xff79 HALFWIDTH KATAKANA LETTER KE
260 0xff75, // 0xff7a HALFWIDTH KATAKANA LETTER KO
261 0xff71, // 0xff7b HALFWIDTH KATAKANA LETTER SA
262 0xff72, // 0xff7c HALFWIDTH KATAKANA LETTER SI
263 0xff73, // 0xff7d HALFWIDTH KATAKANA LETTER SU
264 0xff74, // 0xff7e HALFWIDTH KATAKANA LETTER SE
265 0xff75, // 0xff7f HALFWIDTH KATAKANA LETTER SO
266 0xff71, // 0xff80 HALFWIDTH KATAKANA LETTER TA
267 0xff72, // 0xff81 HALFWIDTH KATAKANA LETTER TI
268 0xff73, // 0xff82 HALFWIDTH KATAKANA LETTER TU
269 0xff74, // 0xff83 HALFWIDTH KATAKANA LETTER TE
270 0xff75, // 0xff84 HALFWIDTH KATAKANA LETTER TO
271 0xff71, // 0xff85 HALFWIDTH KATAKANA LETTER NA
272 0xff72, // 0xff86 HALFWIDTH KATAKANA LETTER NI
273 0xff73, // 0xff87 HALFWIDTH KATAKANA LETTER NU
274 0xff74, // 0xff88 HALFWIDTH KATAKANA LETTER NE
275 0xff75, // 0xff89 HALFWIDTH KATAKANA LETTER NO
276 0xff71, // 0xff8a HALFWIDTH KATAKANA LETTER HA
277 0xff72, // 0xff8b HALFWIDTH KATAKANA LETTER HI
278 0xff73, // 0xff8c HALFWIDTH KATAKANA LETTER HU
279 0xff74, // 0xff8d HALFWIDTH KATAKANA LETTER HE
280 0xff75, // 0xff8e HALFWIDTH KATAKANA LETTER HO
281 0xff71, // 0xff8f HALFWIDTH KATAKANA LETTER MA
282 0xff72, // 0xff90 HALFWIDTH KATAKANA LETTER MI
283 0xff73, // 0xff91 HALFWIDTH KATAKANA LETTER MU
284 0xff74, // 0xff92 HALFWIDTH KATAKANA LETTER ME
285 0xff75, // 0xff93 HALFWIDTH KATAKANA LETTER MO
286 0xff71, // 0xff94 HALFWIDTH KATAKANA LETTER YA
287 0xff73, // 0xff95 HALFWIDTH KATAKANA LETTER YU
288 0xff75, // 0xff96 HALFWIDTH KATAKANA LETTER YO
289 0xff71, // 0xff97 HALFWIDTH KATAKANA LETTER RA
290 0xff72, // 0xff98 HALFWIDTH KATAKANA LETTER RI
291 0xff73, // 0xff99 HALFWIDTH KATAKANA LETTER RU
292 0xff74, // 0xff9a HALFWIDTH KATAKANA LETTER RE
293 0xff75, // 0xff9b HALFWIDTH KATAKANA LETTER RO
294 0xff71 // 0xff9c HALFWIDTH KATAKANA LETTER WA
295 // 0x0000, // 0xff9d HALFWIDTH KATAKANA LETTER N
296 // 0x0000, // 0xff9e HALFWIDTH KATAKANA VOICED SOUND MARK
297 // 0x0000 // 0xff9f HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
298 };
299
300
301 OUString SAL_CALL
folding(const OUString & inStr,sal_Int32 startPos,sal_Int32 nCount,Sequence<sal_Int32> & offset)302 ignoreProlongedSoundMark_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset )
303 throw(RuntimeException)
304 {
305 // Create a string buffer which can hold nCount + 1 characters.
306 // The reference count is 0 now.
307 rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h
308 sal_Unicode * dst = newStr->buffer;
309 const sal_Unicode * src = inStr.getStr() + startPos;
310
311 sal_Int32 *p = 0;
312 sal_Int32 position = 0;
313
314 if (useOffset) {
315 // Allocate nCount length to offset argument.
316 offset.realloc( nCount );
317 p = offset.getArray();
318 position = startPos;
319 }
320
321 //
322 sal_Unicode previousChar = *src ++;
323 sal_Unicode currentChar;
324
325 // Conversion
326 while (-- nCount > 0) {
327 currentChar = *src ++;
328
329 if (currentChar == 0x30fc || // KATAKANA-HIRAGANA PROLONGED SOUND MARK
330 currentChar == 0xff70) { // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
331
332 if (0x3041 <= previousChar && previousChar <= 0x30fa) {
333 currentChar = table_normalwidth[ previousChar - 0x3041 ];
334 }
335 else if (0xff66 <= previousChar && previousChar <= 0xff9c) {
336 currentChar = table_halfwidth[ previousChar - 0xff66 ];
337 }
338 }
339
340 if (useOffset)
341 *p ++ = position ++;
342 *dst ++ = previousChar;
343 previousChar = currentChar;
344 }
345
346 if (nCount == 0) {
347 if (useOffset)
348 *p = position;
349 *dst ++ = previousChar;
350 }
351
352 *dst = (sal_Unicode) 0;
353
354 newStr->length = sal_Int32(dst - newStr->buffer);
355 if (useOffset)
356 offset.realloc(newStr->length);
357 return OUString( newStr, SAL_NO_ACQUIRE ); // take over ownership of <newStr>
358
359 }
360
361 } } } }
362