1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include "i18nutil/casefolding.hxx" 29 #include "casefolding_data.h" 30 #include "i18nutil/widthfolding.hxx" 31 32 using namespace com::sun::star::lang; 33 using namespace com::sun::star::uno; 34 35 namespace com { namespace sun { namespace star { namespace i18n { 36 37 static Mapping mapping_03a3[] = {{0, 1, {0x03c2, 0, 0}},{0, 1, {0x03c3, 0, 0}}}; 38 static Mapping mapping_0307[] = {{0, 0, {0, 0, 0}},{0, 1, {0x0307, 0, 0}}}; 39 static Mapping mapping_004a[] = {{0, 2, {0x006a, 0x0307, 0}},{0, 1, {0x006a, 0, 0}}}; 40 static Mapping mapping_012e[] = {{0, 2, {0x012f, 0x0307, 0}},{0, 1, {0x012f, 0, 0}}}; 41 static Mapping mapping_00cc[] = {{0, 3, {0x0069, 0x0307, 0x0300}},{0, 1, {0x00ec, 0, 0}}}; 42 static Mapping mapping_00cd[] = {{0, 3, {0x0069, 0x0307, 0x0301}},{0, 1, {0x00ed, 0, 0}}}; 43 static Mapping mapping_0128[] = {{0, 3, {0x0069, 0x0307, 0x0303}},{0, 1, {0x0129, 0, 0}}}; 44 static Mapping mapping_0049[] = {{0, 2, {0x0069, 0x0307, 0}},{0, 1, {0x0131, 0, 0}},{0, 1, {0x0069, 0, 0}}}; 45 static Mapping mapping_0069[] = {{0, 1, {0x0130, 0, 0}},{0, 1, {0x0049, 0, 0}}}; 46 static Mapping mapping_0130[] = {{0, 1, {0x0069, 0, 0}},{0, 1, {0x0130, 0, 0}}}; 47 48 #define langIs(lang) (aLocale.Language.compareToAscii(lang) == 0) 49 50 // only check simple case, there is more complicated case need to be checked. 51 #define type_i(ch) ((ch) == 0x0069 || (ch) == 0x006a) 52 53 #define cased_letter(ch) (CaseMappingIndex[(ch)>>8] >= 0 && (CaseMappingValue[(CaseMappingIndex[(ch)>>8] << 8) + ((ch)&0xff)].type & CasedLetter)) 54 55 // for Lithuanian, condition to make explicit dot above when lowercasing capital I's and J's 56 // whenever there are more accents above. 57 #define accent_above(ch) (((ch) >= 0x0300 && (ch) <= 0x0314) || ((ch) >= 0x033D && (ch) <= 0x0344) || (ch) == 0x0346 || ((ch) >= 0x034A && (ch) <= 0x034C)) 58 59 Mapping& casefolding::getConditionalValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale& aLocale, sal_uInt8 nMappingType) throw (RuntimeException) 60 { 61 switch(str[pos]) { 62 case 0x03a3: 63 // final_sigma (not followed by cased and preceded by cased character) 64 // DOES NOT check ignorable sequence yet (more complicated implementation). 65 return !(pos < len && cased_letter(str[pos+1])) && (pos > 0 && cased_letter(str[pos-1])) ? 66 mapping_03a3[0] : mapping_03a3[1]; 67 case 0x0307: 68 return (((nMappingType == MappingTypeLowerToUpper && langIs("lt")) || 69 (nMappingType == MappingTypeUpperToLower && (langIs("tr") || langIs("az")))) && 70 (pos > 0 && type_i(str[pos-1]))) ? // after_i 71 mapping_0307[0] : mapping_0307[1]; 72 case 0x0130: 73 return (langIs("tr") || langIs("az")) ? mapping_0130[0] : mapping_0130[1]; 74 case 0x0069: 75 return (langIs("tr") || langIs("az")) ? mapping_0069[0] : mapping_0069[1]; 76 case 0x0049: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_0049[0] : 77 (langIs("tr") || langIs("az")) ? mapping_0049[1] : mapping_0049[2]; 78 case 0x004a: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_004a[0] : mapping_004a[1]; 79 case 0x012e: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_012e[0] : mapping_012e[1]; 80 case 0x00cc: return langIs("lt") ? mapping_00cc[0] : mapping_00cc[1]; 81 case 0x00cd: return langIs("lt") ? mapping_00cd[0] : mapping_00cd[1]; 82 case 0x0128: return langIs("lt") ? mapping_0128[0] : mapping_0128[1]; 83 } 84 // Should not come here 85 throw RuntimeException(); 86 } 87 88 Mapping& casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale& aLocale, sal_uInt8 nMappingType) throw (RuntimeException) 89 { 90 static Mapping dummy = { 0, 1, { 0, 0, 0 } }; 91 sal_Int16 address = CaseMappingIndex[str[pos] >> 8] << 8; 92 93 dummy.map[0] = str[pos]; 94 95 if (address >= 0 && (CaseMappingValue[address += (str[pos] & 0xFF)].type & nMappingType)) { 96 sal_uInt8 type = CaseMappingValue[address].type; 97 if (type & ValueTypeNotValue) { 98 if (CaseMappingValue[address].value == 0) 99 return getConditionalValue(str, pos, len, aLocale, nMappingType); 100 else { 101 for (int map = CaseMappingValue[address].value; 102 map < CaseMappingValue[address].value + MaxCaseMappingExtras; map++) { 103 if (CaseMappingExtra[map].type & nMappingType) { 104 if (CaseMappingExtra[map].type & ValueTypeNotValue) 105 return getConditionalValue(str, pos, len, aLocale, nMappingType); 106 else 107 return CaseMappingExtra[map]; 108 } 109 } 110 // Should not come here 111 throw RuntimeException(); 112 } 113 } else 114 dummy.map[0] = CaseMappingValue[address].value; 115 } 116 return dummy; 117 } 118 119 inline sal_Bool SAL_CALL 120 is_ja_voice_sound_mark(sal_Unicode& current, sal_Unicode next) 121 { 122 sal_Unicode c = 0; 123 124 if ((next == 0x3099 || next == 0x309a) && ( (c = widthfolding::getCompositionChar(current, next)) != 0 )) 125 current = c; 126 return c != 0; 127 } 128 129 sal_Unicode casefolding::getNextChar(const sal_Unicode *str, sal_Int32& idx, sal_Int32 len, MappingElement& e, Locale& aLocale, sal_uInt8 nMappingType, TransliterationModules moduleLoaded) throw (RuntimeException) 130 { 131 if( idx >= len ) 132 { 133 e = MappingElement(); 134 return 0; 135 } 136 137 sal_Unicode c; 138 139 if (moduleLoaded & TransliterationModules_IGNORE_CASE) { 140 if( e.current >= e.element.nmap ) { 141 e.element = getValue(str, idx++, len, aLocale, nMappingType); 142 e.current = 0; 143 } 144 c = e.element.map[e.current++]; 145 } else { 146 c = *(str + idx++); 147 } 148 149 if (moduleLoaded & TransliterationModules_IGNORE_KANA) { 150 if ((0x3040 <= c && c <= 0x3094) || (0x309d <= c && c <= 0x309f)) 151 c += 0x60; 152 } 153 154 // composition: KA + voice-mark --> GA. see halfwidthToFullwidth.cxx for detail 155 if (moduleLoaded & TransliterationModules_IGNORE_WIDTH) { 156 static oneToOneMapping& half2fullTable = widthfolding::gethalf2fullTable(); 157 c = half2fullTable[c]; 158 if (0x3040 <= c && c <= 0x30ff && idx < len && 159 is_ja_voice_sound_mark(c, half2fullTable[*(str + idx)])) 160 idx++; 161 } 162 163 return c; 164 } 165 166 } } } } 167 168