175272fefSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 375272fefSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 475272fefSAndrew Rist * or more contributor license agreements. See the NOTICE file 575272fefSAndrew Rist * distributed with this work for additional information 675272fefSAndrew Rist * regarding copyright ownership. The ASF licenses this file 775272fefSAndrew Rist * to you under the Apache License, Version 2.0 (the 875272fefSAndrew Rist * "License"); you may not use this file except in compliance 975272fefSAndrew Rist * with the License. You may obtain a copy of the License at 10cdf0e10cSrcweir * 1175272fefSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12cdf0e10cSrcweir * 1375272fefSAndrew Rist * Unless required by applicable law or agreed to in writing, 1475272fefSAndrew Rist * software distributed under the License is distributed on an 1575272fefSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 1675272fefSAndrew Rist * KIND, either express or implied. See the License for the 1775272fefSAndrew Rist * specific language governing permissions and limitations 1875272fefSAndrew Rist * under the License. 19cdf0e10cSrcweir * 2075272fefSAndrew Rist *************************************************************/ 2175272fefSAndrew Rist 2275272fefSAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // prevent internal compiler error with MSVC6SP3 25cdf0e10cSrcweir #include <utility> 26cdf0e10cSrcweir #include <i18nutil/widthfolding.hxx> 27cdf0e10cSrcweir #include <i18nutil/x_rtl_ustring.h> 28cdf0e10cSrcweir #include "widthfolding_data.h" 29cdf0e10cSrcweir 30cdf0e10cSrcweir using namespace com::sun::star::uno; 31cdf0e10cSrcweir using namespace rtl; 32cdf0e10cSrcweir 33cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 34cdf0e10cSrcweir 35cdf0e10cSrcweir sal_Unicode widthfolding::decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar) 36cdf0e10cSrcweir { 37cdf0e10cSrcweir if (0x30a0 <= inChar && inChar <= 0x30ff) { 38cdf0e10cSrcweir sal_Int16 i = inChar - 0x3040; 39cdf0e10cSrcweir if (decomposition_table[i].decomposited_character_1) 40cdf0e10cSrcweir return 0xFFFF; 41cdf0e10cSrcweir } 42cdf0e10cSrcweir return inChar; 43cdf0e10cSrcweir } 44cdf0e10cSrcweir 45cdf0e10cSrcweir /** 46cdf0e10cSrcweir * Decompose Japanese specific voiced and semi-voiced sound marks. 47cdf0e10cSrcweir */ 48cdf0e10cSrcweir OUString widthfolding::decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset ) 49cdf0e10cSrcweir { 50cdf0e10cSrcweir // Create a string buffer which can hold nCount * 2 + 1 characters. 51cdf0e10cSrcweir // Its size may become double of nCount. 524674bdb9SOliver-Rainer Wittmann rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount * 2 ); // defined in x_rtl_ustring.h 53cdf0e10cSrcweir 54cdf0e10cSrcweir sal_Int32 *p = NULL; 55cdf0e10cSrcweir sal_Int32 position = 0; 56cdf0e10cSrcweir if (useOffset) { 57cdf0e10cSrcweir // Allocate double of nCount length to offset argument. 58cdf0e10cSrcweir offset.realloc( nCount * 2 ); 59cdf0e10cSrcweir p = offset.getArray(); 60cdf0e10cSrcweir position = startPos; 61cdf0e10cSrcweir } 62cdf0e10cSrcweir 63cdf0e10cSrcweir // Prepare pointers of unicode character arrays. 64cdf0e10cSrcweir const sal_Unicode* src = inStr.getStr() + startPos; 65cdf0e10cSrcweir sal_Unicode* dst = newStr->buffer; 66cdf0e10cSrcweir 67cdf0e10cSrcweir // Decomposition: GA --> KA + voice-mark 68cdf0e10cSrcweir while (nCount -- > 0) { 69cdf0e10cSrcweir sal_Unicode c = *src++; 70cdf0e10cSrcweir // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) 71cdf0e10cSrcweir // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) 72cdf0e10cSrcweir // Hiragana is not applied to decomposition. 73cdf0e10cSrcweir // Only Katakana is applied to decomposition 74cdf0e10cSrcweir if (0x30a0 <= c && c <= 0x30ff) { 75cdf0e10cSrcweir int i = int(c - 0x3040); 76cdf0e10cSrcweir sal_Unicode first = decomposition_table[i].decomposited_character_1; 77cdf0e10cSrcweir if (first != 0x0000) { 78cdf0e10cSrcweir *dst ++ = first; 79cdf0e10cSrcweir *dst ++ = decomposition_table[i].decomposited_character_2; // second 80cdf0e10cSrcweir if (useOffset) { 81cdf0e10cSrcweir *p ++ = position; 82cdf0e10cSrcweir *p ++ = position ++; 83cdf0e10cSrcweir } 84cdf0e10cSrcweir continue; 85cdf0e10cSrcweir } 86cdf0e10cSrcweir } 87cdf0e10cSrcweir *dst ++ = c; 88cdf0e10cSrcweir if (useOffset) 89cdf0e10cSrcweir *p ++ = position ++; 90cdf0e10cSrcweir } 91cdf0e10cSrcweir *dst = (sal_Unicode) 0; 92cdf0e10cSrcweir 93cdf0e10cSrcweir newStr->length = sal_Int32(dst - newStr->buffer); 94cdf0e10cSrcweir if (useOffset) 95cdf0e10cSrcweir offset.realloc(newStr->length); 964674bdb9SOliver-Rainer Wittmann return OUString( newStr, SAL_NO_ACQUIRE ); // take over ownership of <newStr> 97cdf0e10cSrcweir } 98cdf0e10cSrcweir 99cdf0e10cSrcweir oneToOneMapping& widthfolding::getfull2halfTable(void) 100cdf0e10cSrcweir { 101cdf0e10cSrcweir static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_NORMAL); 102cdf0e10cSrcweir table.makeIndex(); 103cdf0e10cSrcweir return table; 104cdf0e10cSrcweir } 105cdf0e10cSrcweir 106cdf0e10cSrcweir /** 107cdf0e10cSrcweir * Compose Japanese specific voiced and semi-voiced sound marks. 108cdf0e10cSrcweir */ 109cdf0e10cSrcweir OUString widthfolding::compose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset, sal_Int32 nFlags ) 110cdf0e10cSrcweir { 111cdf0e10cSrcweir // Create a string buffer which can hold nCount + 1 characters. 112cdf0e10cSrcweir // Its size may become equal to nCount or smaller. 113cdf0e10cSrcweir rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h 114cdf0e10cSrcweir 115cdf0e10cSrcweir // Prepare pointers of unicode character arrays. 116cdf0e10cSrcweir const sal_Unicode* src = inStr.getStr() + startPos; 117cdf0e10cSrcweir sal_Unicode* dst = newStr->buffer; 118cdf0e10cSrcweir 119cdf0e10cSrcweir // This conversion algorithm requires at least one character. 120cdf0e10cSrcweir if (nCount > 0) { 121cdf0e10cSrcweir 122cdf0e10cSrcweir // .. .. KA VOICE .. .. 123cdf0e10cSrcweir // ^ ^ 124cdf0e10cSrcweir // previousChar currentChar 125cdf0e10cSrcweir // ^ 126cdf0e10cSrcweir // position 127cdf0e10cSrcweir // 128cdf0e10cSrcweir // will be converted to 129cdf0e10cSrcweir // .. .. GA .. .. 130cdf0e10cSrcweir 131cdf0e10cSrcweir sal_Int32 *p = NULL; 132cdf0e10cSrcweir sal_Int32 position = 0; 133cdf0e10cSrcweir if (useOffset) { 134cdf0e10cSrcweir // Allocate nCount length to offset argument. 135cdf0e10cSrcweir offset.realloc( nCount ); 136cdf0e10cSrcweir p = offset.getArray(); 137cdf0e10cSrcweir position = startPos; 138cdf0e10cSrcweir } 139cdf0e10cSrcweir 140cdf0e10cSrcweir // 141cdf0e10cSrcweir sal_Unicode previousChar = *src ++; 142cdf0e10cSrcweir sal_Unicode currentChar; 143cdf0e10cSrcweir 144cdf0e10cSrcweir // Composition: KA + voice-mark --> GA 145cdf0e10cSrcweir while (-- nCount > 0) { 146cdf0e10cSrcweir currentChar = *src ++; 147cdf0e10cSrcweir // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) 148cdf0e10cSrcweir // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) 149cdf0e10cSrcweir // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 150cdf0e10cSrcweir // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 151cdf0e10cSrcweir // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK 152cdf0e10cSrcweir // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 153cdf0e10cSrcweir int j = currentChar - 0x3099; // 0x3099, 0x309a, 0x309b, 0x309c ? 154cdf0e10cSrcweir 155cdf0e10cSrcweir if (2 <= j && j <= 3) // 0x309b or 0x309c 156cdf0e10cSrcweir j -= 2; 157cdf0e10cSrcweir 158cdf0e10cSrcweir if (0 <= j && j <= 1) { 159cdf0e10cSrcweir // 0 addresses a code point regarding 0x3099 or 0x309b (voiced sound mark), 160cdf0e10cSrcweir // 1 is 0x309a or 0x309c (semi-voiced sound mark) 161cdf0e10cSrcweir int i = int(previousChar - 0x3040); // i acts as an index of array 162cdf0e10cSrcweir sal_Bool bCompose = sal_False; 163cdf0e10cSrcweir 164cdf0e10cSrcweir if (0 <= i && i <= (0x30ff - 0x3040) && composition_table[i][j]) 165cdf0e10cSrcweir bCompose = sal_True; 166cdf0e10cSrcweir 167cdf0e10cSrcweir // not to use combined KATAKANA LETTER VU 168cdf0e10cSrcweir if ( previousChar == 0x30a6 && (nFlags & WIDTHFOLDNIG_DONT_USE_COMBINED_VU) ) 169cdf0e10cSrcweir bCompose = sal_False; 170cdf0e10cSrcweir 171cdf0e10cSrcweir if( bCompose ){ 172cdf0e10cSrcweir if (useOffset) { 173cdf0e10cSrcweir position ++; 174cdf0e10cSrcweir *p ++ = position ++; 175cdf0e10cSrcweir } 176cdf0e10cSrcweir *dst ++ = composition_table[i][j]; 177cdf0e10cSrcweir previousChar = *src ++; 178cdf0e10cSrcweir nCount --; 179cdf0e10cSrcweir continue; 180cdf0e10cSrcweir } 181cdf0e10cSrcweir } 182cdf0e10cSrcweir if (useOffset) 183cdf0e10cSrcweir *p ++ = position ++; 184cdf0e10cSrcweir *dst ++ = previousChar; 185cdf0e10cSrcweir previousChar = currentChar; 186cdf0e10cSrcweir } 187cdf0e10cSrcweir 188cdf0e10cSrcweir if (nCount == 0) { 189cdf0e10cSrcweir if (useOffset) 190cdf0e10cSrcweir *p = position; 191cdf0e10cSrcweir *dst ++ = previousChar; 192cdf0e10cSrcweir } 193cdf0e10cSrcweir 194cdf0e10cSrcweir *dst = (sal_Unicode) 0; 195cdf0e10cSrcweir 196cdf0e10cSrcweir newStr->length = sal_Int32(dst - newStr->buffer); 197cdf0e10cSrcweir } 198cdf0e10cSrcweir if (useOffset) 199cdf0e10cSrcweir offset.realloc(newStr->length); 2004674bdb9SOliver-Rainer Wittmann return OUString( newStr, SAL_NO_ACQUIRE ); // take over ownership of <newStr> 201cdf0e10cSrcweir } 202cdf0e10cSrcweir 203cdf0e10cSrcweir oneToOneMapping& widthfolding::gethalf2fullTable(void) 204cdf0e10cSrcweir { 205cdf0e10cSrcweir static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_NORMAL); 206cdf0e10cSrcweir table.makeIndex(); 207cdf0e10cSrcweir return table; 208cdf0e10cSrcweir } 209cdf0e10cSrcweir 210cdf0e10cSrcweir sal_Unicode widthfolding::getCompositionChar(sal_Unicode c1, sal_Unicode c2) 211cdf0e10cSrcweir { 212cdf0e10cSrcweir return composition_table[c1 - 0x3040][c2 - 0x3099]; 213cdf0e10cSrcweir } 214cdf0e10cSrcweir 215cdf0e10cSrcweir 216cdf0e10cSrcweir oneToOneMapping& widthfolding::getfull2halfTableForASC() 217cdf0e10cSrcweir { 218cdf0e10cSrcweir static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_ASC_FUNCTION); 219cdf0e10cSrcweir table.makeIndex(); 220cdf0e10cSrcweir 221cdf0e10cSrcweir // bluedwarf: dirty hack! 222cdf0e10cSrcweir // There is an exception. Additional conversion is required following: 223cdf0e10cSrcweir // 0xFFE5 (FULLWIDTH YEN SIGN) --> 0x005C (REVERSE SOLIDUS) 224cdf0e10cSrcweir // 225cdf0e10cSrcweir // See the following page for detail: 226*11774565Smseidel // https://wiki.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions 227cdf0e10cSrcweir int i, j, high, low; 228cdf0e10cSrcweir int n = sizeof(full2halfASCException) / sizeof(UnicodePairWithFlag); 229cdf0e10cSrcweir for( i = 0; i < n; i++ ) 230cdf0e10cSrcweir { 231cdf0e10cSrcweir high = (full2halfASCException[i].first >> 8) & 0xFF; 232cdf0e10cSrcweir low = (full2halfASCException[i].first) & 0xFF; 233cdf0e10cSrcweir 234cdf0e10cSrcweir if( !table.mpIndex[high] ) 235cdf0e10cSrcweir { 236cdf0e10cSrcweir table.mpIndex[high] = new UnicodePairWithFlag*[256]; 237cdf0e10cSrcweir 238cdf0e10cSrcweir for( j = 0; j < 256; j++ ) 239cdf0e10cSrcweir table.mpIndex[high][j] = NULL; 240cdf0e10cSrcweir } 241cdf0e10cSrcweir table.mpIndex[high][low] = &full2halfASCException[i]; 242cdf0e10cSrcweir } 243cdf0e10cSrcweir 244cdf0e10cSrcweir return table; 245cdf0e10cSrcweir } 246cdf0e10cSrcweir 247cdf0e10cSrcweir oneToOneMapping& widthfolding::gethalf2fullTableForJIS() 248cdf0e10cSrcweir { 249cdf0e10cSrcweir static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_JIS_FUNCTION); 250cdf0e10cSrcweir table.makeIndex(); 251cdf0e10cSrcweir 252cdf0e10cSrcweir // bluedwarf: dirty hack! 253cdf0e10cSrcweir // There are some exceptions. Additional conversion are required following: 254cdf0e10cSrcweir // 0x0022 (QUOTATION MARK) --> 0x201D (RIGHT DOUBLE QUOTATION MARK) 255cdf0e10cSrcweir // 0x0027 (APOSTROPHE) --> 0x2019 (RIGHT SINGLE QUOTATION MARK) 256cdf0e10cSrcweir // 0x005C (REVERSE SOLIDUS) --> 0xFFE5 (FULLWIDTH YEN SIGN) 257cdf0e10cSrcweir // 0x0060 (GRAVE ACCENT) --> 0x2018 (LEFT SINGLE QUOTATION MARK) 258cdf0e10cSrcweir // 259cdf0e10cSrcweir // See the following page for detail: 260*11774565Smseidel // https://wiki.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions 261cdf0e10cSrcweir int i, j, high, low; 262cdf0e10cSrcweir int n = sizeof(half2fullJISException) / sizeof(UnicodePairWithFlag); 263cdf0e10cSrcweir for( i = 0; i < n; i++ ) 264cdf0e10cSrcweir { 265cdf0e10cSrcweir high = (half2fullJISException[i].first >> 8) & 0xFF; 266cdf0e10cSrcweir low = (half2fullJISException[i].first) & 0xFF; 267cdf0e10cSrcweir 268cdf0e10cSrcweir if( !table.mpIndex[high] ) 269cdf0e10cSrcweir { 270cdf0e10cSrcweir table.mpIndex[high] = new UnicodePairWithFlag*[256]; 271cdf0e10cSrcweir 272cdf0e10cSrcweir for( j = 0; j < 256; j++ ) 273cdf0e10cSrcweir table.mpIndex[high][j] = NULL; 274cdf0e10cSrcweir } 275cdf0e10cSrcweir table.mpIndex[high][low] = &half2fullJISException[i]; 276cdf0e10cSrcweir } 277cdf0e10cSrcweir 278cdf0e10cSrcweir return table; 279cdf0e10cSrcweir } 280cdf0e10cSrcweir 281cdf0e10cSrcweir oneToOneMapping& widthfolding::getfullKana2halfKanaTable() 282cdf0e10cSrcweir { 283cdf0e10cSrcweir static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_KATAKANA_ONLY); 284cdf0e10cSrcweir table.makeIndex(); 285cdf0e10cSrcweir return table; 286cdf0e10cSrcweir } 287cdf0e10cSrcweir 288cdf0e10cSrcweir oneToOneMapping& widthfolding::gethalfKana2fullKanaTable() 289cdf0e10cSrcweir { 290cdf0e10cSrcweir static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_KATAKANA_ONLY); 291cdf0e10cSrcweir table.makeIndex(); 292cdf0e10cSrcweir return table; 293cdf0e10cSrcweir } 294cdf0e10cSrcweir 295cdf0e10cSrcweir } } } } 296*11774565Smseidel 297