1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // prevent internal compiler error with MSVC6SP3 25 #include <utility> 26 #include <i18nutil/widthfolding.hxx> 27 #include <i18nutil/x_rtl_ustring.h> 28 #include "widthfolding_data.h" 29 30 using namespace com::sun::star::uno; 31 using namespace rtl; 32 33 namespace com { namespace sun { namespace star { namespace i18n { 34 35 sal_Unicode widthfolding::decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar) 36 { 37 if (0x30a0 <= inChar && inChar <= 0x30ff) { 38 sal_Int16 i = inChar - 0x3040; 39 if (decomposition_table[i].decomposited_character_1) 40 return 0xFFFF; 41 } 42 return inChar; 43 } 44 45 /** 46 * Decompose Japanese specific voiced and semi-voiced sound marks. 47 */ 48 OUString widthfolding::decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset ) 49 { 50 // Create a string buffer which can hold nCount * 2 + 1 characters. 51 // Its size may become double of nCount. 52 rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount * 2 ); // defined in x_rtl_ustring.h 53 54 sal_Int32 *p = NULL; 55 sal_Int32 position = 0; 56 if (useOffset) { 57 // Allocate double of nCount length to offset argument. 58 offset.realloc( nCount * 2 ); 59 p = offset.getArray(); 60 position = startPos; 61 } 62 63 // Prepare pointers of unicode character arrays. 64 const sal_Unicode* src = inStr.getStr() + startPos; 65 sal_Unicode* dst = newStr->buffer; 66 67 // Decomposition: GA --> KA + voice-mark 68 while (nCount -- > 0) { 69 sal_Unicode c = *src++; 70 // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) 71 // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) 72 // Hiragana is not applied to decomposition. 73 // Only Katakana is applied to decomposition 74 if (0x30a0 <= c && c <= 0x30ff) { 75 int i = int(c - 0x3040); 76 sal_Unicode first = decomposition_table[i].decomposited_character_1; 77 if (first != 0x0000) { 78 *dst ++ = first; 79 *dst ++ = decomposition_table[i].decomposited_character_2; // second 80 if (useOffset) { 81 *p ++ = position; 82 *p ++ = position ++; 83 } 84 continue; 85 } 86 } 87 *dst ++ = c; 88 if (useOffset) 89 *p ++ = position ++; 90 } 91 *dst = (sal_Unicode) 0; 92 93 newStr->length = sal_Int32(dst - newStr->buffer); 94 if (useOffset) 95 offset.realloc(newStr->length); 96 return OUString( newStr, SAL_NO_ACQUIRE ); // take over ownership of <newStr> 97 } 98 99 oneToOneMapping& widthfolding::getfull2halfTable(void) 100 { 101 static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_NORMAL); 102 table.makeIndex(); 103 return table; 104 } 105 106 /** 107 * Compose Japanese specific voiced and semi-voiced sound marks. 108 */ 109 OUString widthfolding::compose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset, sal_Int32 nFlags ) 110 { 111 // Create a string buffer which can hold nCount + 1 characters. 112 // Its size may become equal to nCount or smaller. 113 rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h 114 115 // Prepare pointers of unicode character arrays. 116 const sal_Unicode* src = inStr.getStr() + startPos; 117 sal_Unicode* dst = newStr->buffer; 118 119 // This conversion algorithm requires at least one character. 120 if (nCount > 0) { 121 122 // .. .. KA VOICE .. .. 123 // ^ ^ 124 // previousChar currentChar 125 // ^ 126 // position 127 // 128 // will be converted to 129 // .. .. GA .. .. 130 131 sal_Int32 *p = NULL; 132 sal_Int32 position = 0; 133 if (useOffset) { 134 // Allocate nCount length to offset argument. 135 offset.realloc( nCount ); 136 p = offset.getArray(); 137 position = startPos; 138 } 139 140 // 141 sal_Unicode previousChar = *src ++; 142 sal_Unicode currentChar; 143 144 // Composition: KA + voice-mark --> GA 145 while (-- nCount > 0) { 146 currentChar = *src ++; 147 // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) 148 // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) 149 // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 150 // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 151 // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK 152 // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 153 int j = currentChar - 0x3099; // 0x3099, 0x309a, 0x309b, 0x309c ? 154 155 if (2 <= j && j <= 3) // 0x309b or 0x309c 156 j -= 2; 157 158 if (0 <= j && j <= 1) { 159 // 0 addresses a code point regarding 0x3099 or 0x309b (voiced sound mark), 160 // 1 is 0x309a or 0x309c (semi-voiced sound mark) 161 int i = int(previousChar - 0x3040); // i acts as an index of array 162 sal_Bool bCompose = sal_False; 163 164 if (0 <= i && i <= (0x30ff - 0x3040) && composition_table[i][j]) 165 bCompose = sal_True; 166 167 // not to use combined KATAKANA LETTER VU 168 if ( previousChar == 0x30a6 && (nFlags & WIDTHFOLDNIG_DONT_USE_COMBINED_VU) ) 169 bCompose = sal_False; 170 171 if( bCompose ){ 172 if (useOffset) { 173 position ++; 174 *p ++ = position ++; 175 } 176 *dst ++ = composition_table[i][j]; 177 previousChar = *src ++; 178 nCount --; 179 continue; 180 } 181 } 182 if (useOffset) 183 *p ++ = position ++; 184 *dst ++ = previousChar; 185 previousChar = currentChar; 186 } 187 188 if (nCount == 0) { 189 if (useOffset) 190 *p = position; 191 *dst ++ = previousChar; 192 } 193 194 *dst = (sal_Unicode) 0; 195 196 newStr->length = sal_Int32(dst - newStr->buffer); 197 } 198 if (useOffset) 199 offset.realloc(newStr->length); 200 return OUString( newStr, SAL_NO_ACQUIRE ); // take over ownership of <newStr> 201 } 202 203 oneToOneMapping& widthfolding::gethalf2fullTable(void) 204 { 205 static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_NORMAL); 206 table.makeIndex(); 207 return table; 208 } 209 210 sal_Unicode widthfolding::getCompositionChar(sal_Unicode c1, sal_Unicode c2) 211 { 212 return composition_table[c1 - 0x3040][c2 - 0x3099]; 213 } 214 215 216 oneToOneMapping& widthfolding::getfull2halfTableForASC() 217 { 218 static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_ASC_FUNCTION); 219 table.makeIndex(); 220 221 // bluedwarf: dirty hack! 222 // There is an exception. Additional conversion is required following: 223 // 0xFFE5 (FULLWIDTH YEN SIGN) --> 0x005C (REVERSE SOLIDUS) 224 // 225 // See the following page for detail: 226 // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions 227 int i, j, high, low; 228 int n = sizeof(full2halfASCException) / sizeof(UnicodePairWithFlag); 229 for( i = 0; i < n; i++ ) 230 { 231 high = (full2halfASCException[i].first >> 8) & 0xFF; 232 low = (full2halfASCException[i].first) & 0xFF; 233 234 if( !table.mpIndex[high] ) 235 { 236 table.mpIndex[high] = new UnicodePairWithFlag*[256]; 237 238 for( j = 0; j < 256; j++ ) 239 table.mpIndex[high][j] = NULL; 240 } 241 table.mpIndex[high][low] = &full2halfASCException[i]; 242 } 243 244 return table; 245 } 246 247 oneToOneMapping& widthfolding::gethalf2fullTableForJIS() 248 { 249 static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_JIS_FUNCTION); 250 table.makeIndex(); 251 252 // bluedwarf: dirty hack! 253 // There are some exceptions. Additional conversion are required following: 254 // 0x0022 (QUOTATION MARK) --> 0x201D (RIGHT DOUBLE QUOTATION MARK) 255 // 0x0027 (APOSTROPHE) --> 0x2019 (RIGHT SINGLE QUOTATION MARK) 256 // 0x005C (REVERSE SOLIDUS) --> 0xFFE5 (FULLWIDTH YEN SIGN) 257 // 0x0060 (GRAVE ACCENT) --> 0x2018 (LEFT SINGLE QUOTATION MARK) 258 // 259 // See the following page for detail: 260 // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions 261 int i, j, high, low; 262 int n = sizeof(half2fullJISException) / sizeof(UnicodePairWithFlag); 263 for( i = 0; i < n; i++ ) 264 { 265 high = (half2fullJISException[i].first >> 8) & 0xFF; 266 low = (half2fullJISException[i].first) & 0xFF; 267 268 if( !table.mpIndex[high] ) 269 { 270 table.mpIndex[high] = new UnicodePairWithFlag*[256]; 271 272 for( j = 0; j < 256; j++ ) 273 table.mpIndex[high][j] = NULL; 274 } 275 table.mpIndex[high][low] = &half2fullJISException[i]; 276 } 277 278 return table; 279 } 280 281 oneToOneMapping& widthfolding::getfullKana2halfKanaTable() 282 { 283 static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_KATAKANA_ONLY); 284 table.makeIndex(); 285 return table; 286 } 287 288 oneToOneMapping& widthfolding::gethalfKana2fullKanaTable() 289 { 290 static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_KATAKANA_ONLY); 291 table.makeIndex(); 292 return table; 293 } 294 295 } } } } 296