1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // prevent internal compiler error with MSVC6SP3 25 #include <utility> 26 #include <i18nutil/widthfolding.hxx> 27 #include <i18nutil/x_rtl_ustring.h> 28 #include "widthfolding_data.h" 29 30 using namespace com::sun::star::uno; 31 using namespace rtl; 32 33 namespace com { namespace sun { namespace star { namespace i18n { 34 35 sal_Unicode widthfolding::decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar) 36 { 37 if (0x30a0 <= inChar && inChar <= 0x30ff) { 38 sal_Int16 i = inChar - 0x3040; 39 if (decomposition_table[i].decomposited_character_1) 40 return 0xFFFF; 41 } 42 return inChar; 43 } 44 45 /** 46 * Decompose Japanese specific voiced and semi-voiced sound marks. 47 */ 48 OUString widthfolding::decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset ) 49 { 50 // Create a string buffer which can hold nCount * 2 + 1 characters. 51 // Its size may become double of nCount. 52 rtl_uString * newStr; 53 x_rtl_uString_new_WithLength( &newStr, nCount * 2 ); // defined in x_rtl_ustring.h The reference count is 0 now. 54 55 sal_Int32 *p = NULL; 56 sal_Int32 position = 0; 57 if (useOffset) { 58 // Allocate double of nCount length to offset argument. 59 offset.realloc( nCount * 2 ); 60 p = offset.getArray(); 61 position = startPos; 62 } 63 64 // Prepare pointers of unicode character arrays. 65 const sal_Unicode* src = inStr.getStr() + startPos; 66 sal_Unicode* dst = newStr->buffer; 67 68 // Decomposition: GA --> KA + voice-mark 69 while (nCount -- > 0) { 70 sal_Unicode c = *src++; 71 // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) 72 // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) 73 // Hiragana is not applied to decomposition. 74 // Only Katakana is applied to decomposition 75 if (0x30a0 <= c && c <= 0x30ff) { 76 int i = int(c - 0x3040); 77 sal_Unicode first = decomposition_table[i].decomposited_character_1; 78 if (first != 0x0000) { 79 *dst ++ = first; 80 *dst ++ = decomposition_table[i].decomposited_character_2; // second 81 if (useOffset) { 82 *p ++ = position; 83 *p ++ = position ++; 84 } 85 continue; 86 } 87 } 88 *dst ++ = c; 89 if (useOffset) 90 *p ++ = position ++; 91 } 92 *dst = (sal_Unicode) 0; 93 94 newStr->length = sal_Int32(dst - newStr->buffer); 95 if (useOffset) 96 offset.realloc(newStr->length); 97 return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. 98 } 99 100 oneToOneMapping& widthfolding::getfull2halfTable(void) 101 { 102 static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_NORMAL); 103 table.makeIndex(); 104 return table; 105 } 106 107 /** 108 * Compose Japanese specific voiced and semi-voiced sound marks. 109 */ 110 OUString widthfolding::compose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset, sal_Int32 nFlags ) 111 { 112 // Create a string buffer which can hold nCount + 1 characters. 113 // Its size may become equal to nCount or smaller. 114 // The reference count is 0 now. 115 rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h 116 117 // Prepare pointers of unicode character arrays. 118 const sal_Unicode* src = inStr.getStr() + startPos; 119 sal_Unicode* dst = newStr->buffer; 120 121 // This conversion algorithm requires at least one character. 122 if (nCount > 0) { 123 124 // .. .. KA VOICE .. .. 125 // ^ ^ 126 // previousChar currentChar 127 // ^ 128 // position 129 // 130 // will be converted to 131 // .. .. GA .. .. 132 133 sal_Int32 *p = NULL; 134 sal_Int32 position = 0; 135 if (useOffset) { 136 // Allocate nCount length to offset argument. 137 offset.realloc( nCount ); 138 p = offset.getArray(); 139 position = startPos; 140 } 141 142 // 143 sal_Unicode previousChar = *src ++; 144 sal_Unicode currentChar; 145 146 // Composition: KA + voice-mark --> GA 147 while (-- nCount > 0) { 148 currentChar = *src ++; 149 // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) 150 // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) 151 // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 152 // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 153 // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK 154 // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 155 int j = currentChar - 0x3099; // 0x3099, 0x309a, 0x309b, 0x309c ? 156 157 if (2 <= j && j <= 3) // 0x309b or 0x309c 158 j -= 2; 159 160 if (0 <= j && j <= 1) { 161 // 0 addresses a code point regarding 0x3099 or 0x309b (voiced sound mark), 162 // 1 is 0x309a or 0x309c (semi-voiced sound mark) 163 int i = int(previousChar - 0x3040); // i acts as an index of array 164 sal_Bool bCompose = sal_False; 165 166 if (0 <= i && i <= (0x30ff - 0x3040) && composition_table[i][j]) 167 bCompose = sal_True; 168 169 // not to use combined KATAKANA LETTER VU 170 if ( previousChar == 0x30a6 && (nFlags & WIDTHFOLDNIG_DONT_USE_COMBINED_VU) ) 171 bCompose = sal_False; 172 173 if( bCompose ){ 174 if (useOffset) { 175 position ++; 176 *p ++ = position ++; 177 } 178 *dst ++ = composition_table[i][j]; 179 previousChar = *src ++; 180 nCount --; 181 continue; 182 } 183 } 184 if (useOffset) 185 *p ++ = position ++; 186 *dst ++ = previousChar; 187 previousChar = currentChar; 188 } 189 190 if (nCount == 0) { 191 if (useOffset) 192 *p = position; 193 *dst ++ = previousChar; 194 } 195 196 *dst = (sal_Unicode) 0; 197 198 newStr->length = sal_Int32(dst - newStr->buffer); 199 } 200 if (useOffset) 201 offset.realloc(newStr->length); 202 return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. 203 } 204 205 oneToOneMapping& widthfolding::gethalf2fullTable(void) 206 { 207 static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_NORMAL); 208 table.makeIndex(); 209 return table; 210 } 211 212 sal_Unicode widthfolding::getCompositionChar(sal_Unicode c1, sal_Unicode c2) 213 { 214 return composition_table[c1 - 0x3040][c2 - 0x3099]; 215 } 216 217 218 oneToOneMapping& widthfolding::getfull2halfTableForASC() 219 { 220 static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_ASC_FUNCTION); 221 table.makeIndex(); 222 223 // bluedwarf: dirty hack! 224 // There is an exception. Additional conversion is required following: 225 // 0xFFE5 (FULLWIDTH YEN SIGN) --> 0x005C (REVERSE SOLIDUS) 226 // 227 // See the following page for detail: 228 // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions 229 int i, j, high, low; 230 int n = sizeof(full2halfASCException) / sizeof(UnicodePairWithFlag); 231 for( i = 0; i < n; i++ ) 232 { 233 high = (full2halfASCException[i].first >> 8) & 0xFF; 234 low = (full2halfASCException[i].first) & 0xFF; 235 236 if( !table.mpIndex[high] ) 237 { 238 table.mpIndex[high] = new UnicodePairWithFlag*[256]; 239 240 for( j = 0; j < 256; j++ ) 241 table.mpIndex[high][j] = NULL; 242 } 243 table.mpIndex[high][low] = &full2halfASCException[i]; 244 } 245 246 return table; 247 } 248 249 oneToOneMapping& widthfolding::gethalf2fullTableForJIS() 250 { 251 static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_JIS_FUNCTION); 252 table.makeIndex(); 253 254 // bluedwarf: dirty hack! 255 // There are some exceptions. Additional conversion are required following: 256 // 0x0022 (QUOTATION MARK) --> 0x201D (RIGHT DOUBLE QUOTATION MARK) 257 // 0x0027 (APOSTROPHE) --> 0x2019 (RIGHT SINGLE QUOTATION MARK) 258 // 0x005C (REVERSE SOLIDUS) --> 0xFFE5 (FULLWIDTH YEN SIGN) 259 // 0x0060 (GRAVE ACCENT) --> 0x2018 (LEFT SINGLE QUOTATION MARK) 260 // 261 // See the following page for detail: 262 // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions 263 int i, j, high, low; 264 int n = sizeof(half2fullJISException) / sizeof(UnicodePairWithFlag); 265 for( i = 0; i < n; i++ ) 266 { 267 high = (half2fullJISException[i].first >> 8) & 0xFF; 268 low = (half2fullJISException[i].first) & 0xFF; 269 270 if( !table.mpIndex[high] ) 271 { 272 table.mpIndex[high] = new UnicodePairWithFlag*[256]; 273 274 for( j = 0; j < 256; j++ ) 275 table.mpIndex[high][j] = NULL; 276 } 277 table.mpIndex[high][low] = &half2fullJISException[i]; 278 } 279 280 return table; 281 } 282 283 oneToOneMapping& widthfolding::getfullKana2halfKanaTable() 284 { 285 static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_KATAKANA_ONLY); 286 table.makeIndex(); 287 return table; 288 } 289 290 oneToOneMapping& widthfolding::gethalfKana2fullKanaTable() 291 { 292 static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_KATAKANA_ONLY); 293 table.makeIndex(); 294 return table; 295 } 296 297 } } } } 298