1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_i18npool.hxx" 30 31 // prevent internal compiler error with MSVC6SP3 32 #include <utility> 33 34 #define TRANSLITERATION_ProlongedSoundMark_ja_JP 35 #include <transliteration_Ignore.hxx> 36 37 using namespace com::sun::star::uno; 38 using namespace com::sun::star::lang; 39 using namespace rtl; 40 41 namespace com { namespace sun { namespace star { namespace i18n { 42 43 static sal_Unicode table_normalwidth[] = { 44 // 0x0000, // 0x3040 45 0x3041, // 0x3041 HIRAGANA LETTER SMALL A 46 0x3042, // 0x3042 HIRAGANA LETTER A 47 0x3043, // 0x3043 HIRAGANA LETTER SMALL I 48 0x3044, // 0x3044 HIRAGANA LETTER I 49 0x3045, // 0x3045 HIRAGANA LETTER SMALL U 50 0x3046, // 0x3046 HIRAGANA LETTER U 51 0x3047, // 0x3047 HIRAGANA LETTER SMALL E 52 0x3048, // 0x3048 HIRAGANA LETTER E 53 0x3049, // 0x3049 HIRAGANA LETTER SMALL O 54 0x304a, // 0x304a HIRAGANA LETTER O 55 0x3042, // 0x304b HIRAGANA LETTER KA 56 0x3042, // 0x304c HIRAGANA LETTER GA 57 0x3044, // 0x304d HIRAGANA LETTER KI 58 0x3044, // 0x304e HIRAGANA LETTER GI 59 0x3046, // 0x304f HIRAGANA LETTER KU 60 0x3046, // 0x3050 HIRAGANA LETTER GU 61 0x3048, // 0x3051 HIRAGANA LETTER KE 62 0x3048, // 0x3052 HIRAGANA LETTER GE 63 0x304a, // 0x3053 HIRAGANA LETTER KO 64 0x304a, // 0x3054 HIRAGANA LETTER GO 65 0x3042, // 0x3055 HIRAGANA LETTER SA 66 0x3042, // 0x3056 HIRAGANA LETTER ZA 67 0x3044, // 0x3057 HIRAGANA LETTER SI 68 0x3044, // 0x3058 HIRAGANA LETTER ZI 69 0x3046, // 0x3059 HIRAGANA LETTER SU 70 0x3046, // 0x305a HIRAGANA LETTER ZU 71 0x3048, // 0x305b HIRAGANA LETTER SE 72 0x3048, // 0x305c HIRAGANA LETTER ZE 73 0x304a, // 0x305d HIRAGANA LETTER SO 74 0x304a, // 0x305e HIRAGANA LETTER ZO 75 0x3042, // 0x305f HIRAGANA LETTER TA 76 0x3042, // 0x3060 HIRAGANA LETTER DA 77 0x3044, // 0x3061 HIRAGANA LETTER TI 78 0x3044, // 0x3062 HIRAGANA LETTER DI 79 0x3045, // 0x3063 HIRAGANA LETTER SMALL TU 80 0x3046, // 0x3064 HIRAGANA LETTER TU 81 0x3046, // 0x3065 HIRAGANA LETTER DU 82 0x3048, // 0x3066 HIRAGANA LETTER TE 83 0x3048, // 0x3067 HIRAGANA LETTER DE 84 0x304a, // 0x3068 HIRAGANA LETTER TO 85 0x304a, // 0x3069 HIRAGANA LETTER DO 86 0x3042, // 0x306a HIRAGANA LETTER NA 87 0x3044, // 0x306b HIRAGANA LETTER NI 88 0x3046, // 0x306c HIRAGANA LETTER NU 89 0x3048, // 0x306d HIRAGANA LETTER NE 90 0x304a, // 0x306e HIRAGANA LETTER NO 91 0x3042, // 0x306f HIRAGANA LETTER HA 92 0x3042, // 0x3070 HIRAGANA LETTER BA 93 0x3042, // 0x3071 HIRAGANA LETTER PA 94 0x3044, // 0x3072 HIRAGANA LETTER HI 95 0x3044, // 0x3073 HIRAGANA LETTER BI 96 0x3044, // 0x3074 HIRAGANA LETTER PI 97 0x3046, // 0x3075 HIRAGANA LETTER HU 98 0x3046, // 0x3076 HIRAGANA LETTER BU 99 0x3046, // 0x3077 HIRAGANA LETTER PU 100 0x3048, // 0x3078 HIRAGANA LETTER HE 101 0x3048, // 0x3079 HIRAGANA LETTER BE 102 0x3048, // 0x307a HIRAGANA LETTER PE 103 0x304a, // 0x307b HIRAGANA LETTER HO 104 0x304a, // 0x307c HIRAGANA LETTER BO 105 0x304a, // 0x307d HIRAGANA LETTER PO 106 0x3042, // 0x307e HIRAGANA LETTER MA 107 0x3044, // 0x307f HIRAGANA LETTER MI 108 0x3046, // 0x3080 HIRAGANA LETTER MU 109 0x3048, // 0x3081 HIRAGANA LETTER ME 110 0x304a, // 0x3082 HIRAGANA LETTER MO 111 0x3041, // 0x3083 HIRAGANA LETTER SMALL YA 112 0x3042, // 0x3084 HIRAGANA LETTER YA 113 0x3045, // 0x3085 HIRAGANA LETTER SMALL YU 114 0x3046, // 0x3086 HIRAGANA LETTER YU 115 0x3049, // 0x3087 HIRAGANA LETTER SMALL YO 116 0x304a, // 0x3088 HIRAGANA LETTER YO 117 0x3042, // 0x3089 HIRAGANA LETTER RA 118 0x3044, // 0x308a HIRAGANA LETTER RI 119 0x3046, // 0x308b HIRAGANA LETTER RU 120 0x3048, // 0x308c HIRAGANA LETTER RE 121 0x304a, // 0x308d HIRAGANA LETTER RO 122 0x3041, // 0x308e HIRAGANA LETTER SMALL WA 123 0x3042, // 0x308f HIRAGANA LETTER WA 124 0x3044, // 0x3090 HIRAGANA LETTER WI 125 0x3048, // 0x3091 HIRAGANA LETTER WE 126 0x304a, // 0x3092 HIRAGANA LETTER WO 127 0x0000, // 0x3093 HIRAGANA LETTER N 128 0x3046, // 0x3094 HIRAGANA LETTER VU 129 0x0000, // 0x3095 130 0x0000, // 0x3096 131 0x0000, // 0x3097 132 0x0000, // 0x3098 133 0x0000, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 134 0x0000, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 135 0x0000, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK 136 0x0000, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 137 0x0000, // 0x309d HIRAGANA ITERATION MARK 138 0x0000, // 0x309e HIRAGANA VOICED ITERATION MARK 139 0x0000, // 0x309f 140 0x0000, // 0x30a0 141 0x30a1, // 0x30a1 KATAKANA LETTER SMALL A 142 0x30a2, // 0x30a2 KATAKANA LETTER A 143 0x30a3, // 0x30a3 KATAKANA LETTER SMALL I 144 0x30a4, // 0x30a4 KATAKANA LETTER I 145 0x30a5, // 0x30a5 KATAKANA LETTER SMALL U 146 0x30a6, // 0x30a6 KATAKANA LETTER U 147 0x30a7, // 0x30a7 KATAKANA LETTER SMALL E 148 0x30a8, // 0x30a8 KATAKANA LETTER E 149 0x30a9, // 0x30a9 KATAKANA LETTER SMALL O 150 0x30aa, // 0x30aa KATAKANA LETTER O 151 0x30a2, // 0x30ab KATAKANA LETTER KA 152 0x30a2, // 0x30ac KATAKANA LETTER GA 153 0x30a4, // 0x30ad KATAKANA LETTER KI 154 0x30a4, // 0x30ae KATAKANA LETTER GI 155 0x30a6, // 0x30af KATAKANA LETTER KU 156 0x30a6, // 0x30b0 KATAKANA LETTER GU 157 0x30a8, // 0x30b1 KATAKANA LETTER KE 158 0x30a8, // 0x30b2 KATAKANA LETTER GE 159 0x30aa, // 0x30b3 KATAKANA LETTER KO 160 0x30aa, // 0x30b4 KATAKANA LETTER GO 161 0x30a2, // 0x30b5 KATAKANA LETTER SA 162 0x30a2, // 0x30b6 KATAKANA LETTER ZA 163 0x30a4, // 0x30b7 KATAKANA LETTER SI 164 0x30a4, // 0x30b8 KATAKANA LETTER ZI 165 0x30a6, // 0x30b9 KATAKANA LETTER SU 166 0x30a6, // 0x30ba KATAKANA LETTER ZU 167 0x30a8, // 0x30bb KATAKANA LETTER SE 168 0x30a8, // 0x30bc KATAKANA LETTER ZE 169 0x30aa, // 0x30bd KATAKANA LETTER SO 170 0x30aa, // 0x30be KATAKANA LETTER ZO 171 0x30a2, // 0x30bf KATAKANA LETTER TA 172 0x30a2, // 0x30c0 KATAKANA LETTER DA 173 0x30a4, // 0x30c1 KATAKANA LETTER TI 174 0x30a4, // 0x30c2 KATAKANA LETTER DI 175 0x30a5, // 0x30c3 KATAKANA LETTER SMALL TU 176 0x30a6, // 0x30c4 KATAKANA LETTER TU 177 0x30a6, // 0x30c5 KATAKANA LETTER DU 178 0x30a8, // 0x30c6 KATAKANA LETTER TE 179 0x30a8, // 0x30c7 KATAKANA LETTER DE 180 0x30aa, // 0x30c8 KATAKANA LETTER TO 181 0x30aa, // 0x30c9 KATAKANA LETTER DO 182 0x30a2, // 0x30ca KATAKANA LETTER NA 183 0x30a4, // 0x30cb KATAKANA LETTER NI 184 0x30a6, // 0x30cc KATAKANA LETTER NU 185 0x30a8, // 0x30cd KATAKANA LETTER NE 186 0x30aa, // 0x30ce KATAKANA LETTER NO 187 0x30a2, // 0x30cf KATAKANA LETTER HA 188 0x30a2, // 0x30d0 KATAKANA LETTER BA 189 0x30a2, // 0x30d1 KATAKANA LETTER PA 190 0x30a4, // 0x30d2 KATAKANA LETTER HI 191 0x30a4, // 0x30d3 KATAKANA LETTER BI 192 0x30a4, // 0x30d4 KATAKANA LETTER PI 193 0x30a6, // 0x30d5 KATAKANA LETTER HU 194 0x30a6, // 0x30d6 KATAKANA LETTER BU 195 0x30a6, // 0x30d7 KATAKANA LETTER PU 196 0x30a8, // 0x30d8 KATAKANA LETTER HE 197 0x30a8, // 0x30d9 KATAKANA LETTER BE 198 0x30a8, // 0x30da KATAKANA LETTER PE 199 0x30aa, // 0x30db KATAKANA LETTER HO 200 0x30aa, // 0x30dc KATAKANA LETTER BO 201 0x30aa, // 0x30dd KATAKANA LETTER PO 202 0x30a2, // 0x30de KATAKANA LETTER MA 203 0x30a4, // 0x30df KATAKANA LETTER MI 204 0x30a6, // 0x30e0 KATAKANA LETTER MU 205 0x30a8, // 0x30e1 KATAKANA LETTER ME 206 0x30aa, // 0x30e2 KATAKANA LETTER MO 207 0x30a1, // 0x30e3 KATAKANA LETTER SMALL YA 208 0x30a2, // 0x30e4 KATAKANA LETTER YA 209 0x30a5, // 0x30e5 KATAKANA LETTER SMALL YU 210 0x30a6, // 0x30e6 KATAKANA LETTER YU 211 0x30a9, // 0x30e7 KATAKANA LETTER SMALL YO 212 0x30aa, // 0x30e8 KATAKANA LETTER YO 213 0x30a2, // 0x30e9 KATAKANA LETTER RA 214 0x30a4, // 0x30ea KATAKANA LETTER RI 215 0x30a6, // 0x30eb KATAKANA LETTER RU 216 0x30a8, // 0x30ec KATAKANA LETTER RE 217 0x30aa, // 0x30ed KATAKANA LETTER RO 218 0x30a1, // 0x30ee KATAKANA LETTER SMALL WA 219 0x30a2, // 0x30ef KATAKANA LETTER WA 220 0x30a4, // 0x30f0 KATAKANA LETTER WI 221 0x30a8, // 0x30f1 KATAKANA LETTER WE 222 0x30aa, // 0x30f2 KATAKANA LETTER WO 223 0x0000, // 0x30f3 KATAKANA LETTER N 224 0x30a6, // 0x30f4 KATAKANA LETTER VU 225 0x30a1, // 0x30f5 KATAKANA LETTER SMALL KA 226 0x30a7, // 0x30f6 KATAKANA LETTER SMALL KE 227 0x30a2, // 0x30f7 KATAKANA LETTER VA 228 0x30a4, // 0x30f8 KATAKANA LETTER VI 229 0x30a8, // 0x30f9 KATAKANA LETTER VE 230 0x30aa // 0x30fa KATAKANA LETTER VO 231 // 0x0000, // 0x30fb KATAKANA MIDDLE DOT 232 // 0x0000, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK 233 // 0x0000, // 0x30fd KATAKANA ITERATION MARK 234 // 0x0000, // 0x30fe KATAKANA VOICED ITERATION MARK 235 // 0x0000 // 0x30ff 236 }; 237 238 static sal_Unicode table_halfwidth[] = { 239 // 0x0000, // 0xff61 HALFWIDTH IDEOGRAPHIC FULL STOP 240 // 0x0000, // 0xff62 HALFWIDTH LEFT CORNER BRACKET 241 // 0x0000, // 0xff63 HALFWIDTH RIGHT CORNER BRACKET 242 // 0x0000, // 0xff64 HALFWIDTH IDEOGRAPHIC COMMA 243 // 0x0000, // 0xff65 HALFWIDTH KATAKANA MIDDLE DOT 244 0xff75, // 0xff66 HALFWIDTH KATAKANA LETTER WO 245 0xff67, // 0xff67 HALFWIDTH KATAKANA LETTER SMALL A 246 0xff68, // 0xff68 HALFWIDTH KATAKANA LETTER SMALL I 247 0xff69, // 0xff69 HALFWIDTH KATAKANA LETTER SMALL U 248 0xff6a, // 0xff6a HALFWIDTH KATAKANA LETTER SMALL E 249 0xff6b, // 0xff6b HALFWIDTH KATAKANA LETTER SMALL O 250 0xff67, // 0xff6c HALFWIDTH KATAKANA LETTER SMALL YA 251 0xff69, // 0xff6d HALFWIDTH KATAKANA LETTER SMALL YU 252 0xff6b, // 0xff6e HALFWIDTH KATAKANA LETTER SMALL YO 253 0xff69, // 0xff6f HALFWIDTH KATAKANA LETTER SMALL TU 254 0x0000, // 0xff70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 255 0xff71, // 0xff71 HALFWIDTH KATAKANA LETTER A 256 0xff72, // 0xff72 HALFWIDTH KATAKANA LETTER I 257 0xff73, // 0xff73 HALFWIDTH KATAKANA LETTER U 258 0xff74, // 0xff74 HALFWIDTH KATAKANA LETTER E 259 0xff75, // 0xff75 HALFWIDTH KATAKANA LETTER O 260 0xff71, // 0xff76 HALFWIDTH KATAKANA LETTER KA 261 0xff72, // 0xff77 HALFWIDTH KATAKANA LETTER KI 262 0xff73, // 0xff78 HALFWIDTH KATAKANA LETTER KU 263 0xff74, // 0xff79 HALFWIDTH KATAKANA LETTER KE 264 0xff75, // 0xff7a HALFWIDTH KATAKANA LETTER KO 265 0xff71, // 0xff7b HALFWIDTH KATAKANA LETTER SA 266 0xff72, // 0xff7c HALFWIDTH KATAKANA LETTER SI 267 0xff73, // 0xff7d HALFWIDTH KATAKANA LETTER SU 268 0xff74, // 0xff7e HALFWIDTH KATAKANA LETTER SE 269 0xff75, // 0xff7f HALFWIDTH KATAKANA LETTER SO 270 0xff71, // 0xff80 HALFWIDTH KATAKANA LETTER TA 271 0xff72, // 0xff81 HALFWIDTH KATAKANA LETTER TI 272 0xff73, // 0xff82 HALFWIDTH KATAKANA LETTER TU 273 0xff74, // 0xff83 HALFWIDTH KATAKANA LETTER TE 274 0xff75, // 0xff84 HALFWIDTH KATAKANA LETTER TO 275 0xff71, // 0xff85 HALFWIDTH KATAKANA LETTER NA 276 0xff72, // 0xff86 HALFWIDTH KATAKANA LETTER NI 277 0xff73, // 0xff87 HALFWIDTH KATAKANA LETTER NU 278 0xff74, // 0xff88 HALFWIDTH KATAKANA LETTER NE 279 0xff75, // 0xff89 HALFWIDTH KATAKANA LETTER NO 280 0xff71, // 0xff8a HALFWIDTH KATAKANA LETTER HA 281 0xff72, // 0xff8b HALFWIDTH KATAKANA LETTER HI 282 0xff73, // 0xff8c HALFWIDTH KATAKANA LETTER HU 283 0xff74, // 0xff8d HALFWIDTH KATAKANA LETTER HE 284 0xff75, // 0xff8e HALFWIDTH KATAKANA LETTER HO 285 0xff71, // 0xff8f HALFWIDTH KATAKANA LETTER MA 286 0xff72, // 0xff90 HALFWIDTH KATAKANA LETTER MI 287 0xff73, // 0xff91 HALFWIDTH KATAKANA LETTER MU 288 0xff74, // 0xff92 HALFWIDTH KATAKANA LETTER ME 289 0xff75, // 0xff93 HALFWIDTH KATAKANA LETTER MO 290 0xff71, // 0xff94 HALFWIDTH KATAKANA LETTER YA 291 0xff73, // 0xff95 HALFWIDTH KATAKANA LETTER YU 292 0xff75, // 0xff96 HALFWIDTH KATAKANA LETTER YO 293 0xff71, // 0xff97 HALFWIDTH KATAKANA LETTER RA 294 0xff72, // 0xff98 HALFWIDTH KATAKANA LETTER RI 295 0xff73, // 0xff99 HALFWIDTH KATAKANA LETTER RU 296 0xff74, // 0xff9a HALFWIDTH KATAKANA LETTER RE 297 0xff75, // 0xff9b HALFWIDTH KATAKANA LETTER RO 298 0xff71 // 0xff9c HALFWIDTH KATAKANA LETTER WA 299 // 0x0000, // 0xff9d HALFWIDTH KATAKANA LETTER N 300 // 0x0000, // 0xff9e HALFWIDTH KATAKANA VOICED SOUND MARK 301 // 0x0000 // 0xff9f HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 302 }; 303 304 305 OUString SAL_CALL 306 ignoreProlongedSoundMark_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) 307 throw(RuntimeException) 308 { 309 // Create a string buffer which can hold nCount + 1 characters. 310 // The reference count is 0 now. 311 rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h 312 sal_Unicode * dst = newStr->buffer; 313 const sal_Unicode * src = inStr.getStr() + startPos; 314 315 sal_Int32 *p = 0; 316 sal_Int32 position = 0; 317 318 if (useOffset) { 319 // Allocate nCount length to offset argument. 320 offset.realloc( nCount ); 321 p = offset.getArray(); 322 position = startPos; 323 } 324 325 // 326 sal_Unicode previousChar = *src ++; 327 sal_Unicode currentChar; 328 329 // Conversion 330 while (-- nCount > 0) { 331 currentChar = *src ++; 332 333 if (currentChar == 0x30fc || // KATAKANA-HIRAGANA PROLONGED SOUND MARK 334 currentChar == 0xff70) { // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 335 336 if (0x3041 <= previousChar && previousChar <= 0x30fa) { 337 currentChar = table_normalwidth[ previousChar - 0x3041 ]; 338 } 339 else if (0xff66 <= previousChar && previousChar <= 0xff9c) { 340 currentChar = table_halfwidth[ previousChar - 0xff66 ]; 341 } 342 } 343 344 if (useOffset) 345 *p ++ = position ++; 346 *dst ++ = previousChar; 347 previousChar = currentChar; 348 } 349 350 if (nCount == 0) { 351 if (useOffset) 352 *p = position; 353 *dst ++ = previousChar; 354 } 355 356 *dst = (sal_Unicode) 0; 357 358 newStr->length = sal_Int32(dst - newStr->buffer); 359 if (useOffset) 360 offset.realloc(newStr->length); 361 return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. 362 363 } 364 365 } } } } 366