1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_svl.hxx" 30 #include <svl/lngmisc.hxx> 31 #include <tools/solar.h> 32 #include <tools/string.hxx> 33 #include <tools/debug.hxx> 34 #include <rtl/ustrbuf.hxx> 35 #include <rtl/ustring.hxx> 36 37 using namespace rtl; 38 39 namespace linguistic 40 { 41 42 /////////////////////////////////////////////////////////////////////////// 43 44 sal_Int32 GetNumControlChars( const OUString &rTxt ) 45 { 46 sal_Int32 nCnt = 0; 47 sal_Int32 nLen = rTxt.getLength(); 48 for (sal_Int32 i = 0; i < nLen; ++i) 49 { 50 if (IsControlChar( rTxt[i] )) 51 ++nCnt; 52 } 53 return nCnt; 54 } 55 56 57 sal_Bool RemoveHyphens( OUString &rTxt ) 58 { 59 sal_Bool bModified = sal_False; 60 if (HasHyphens( rTxt )) 61 { 62 String aTmp( rTxt ); 63 aTmp.EraseAllChars( SVT_SOFT_HYPHEN ); 64 aTmp.EraseAllChars( SVT_HARD_HYPHEN ); 65 rTxt = aTmp; 66 bModified = sal_True; 67 } 68 return bModified; 69 } 70 71 72 sal_Bool RemoveControlChars( OUString &rTxt ) 73 { 74 sal_Bool bModified = sal_False; 75 sal_Int32 nCtrlChars = GetNumControlChars( rTxt ); 76 if (nCtrlChars) 77 { 78 sal_Int32 nLen = rTxt.getLength(); 79 sal_Int32 nSize = nLen - nCtrlChars; 80 OUStringBuffer aBuf( nSize ); 81 aBuf.setLength( nSize ); 82 sal_Int32 nCnt = 0; 83 for (sal_Int32 i = 0; i < nLen; ++i) 84 { 85 sal_Unicode cChar = rTxt[i]; 86 if (!IsControlChar( cChar )) 87 { 88 DBG_ASSERT( nCnt < nSize, "index out of range" ); 89 aBuf.setCharAt( nCnt++, cChar ); 90 } 91 } 92 DBG_ASSERT( nCnt == nSize, "wrong size" ); 93 rTxt = aBuf.makeStringAndClear(); 94 bModified = sal_True; 95 } 96 return bModified; 97 } 98 99 100 // non breaking field character 101 #define CH_TXTATR_INWORD ((sal_Char) 0x02) 102 103 sal_Bool ReplaceControlChars( rtl::OUString &rTxt, sal_Char /*aRplcChar*/ ) 104 { 105 // the resulting string looks like this: 106 // 1. non breaking field characters get removed 107 // 2. remaining control characters will be replaced by ' ' 108 109 sal_Bool bModified = sal_False; 110 sal_Int32 nCtrlChars = GetNumControlChars( rTxt ); 111 if (nCtrlChars) 112 { 113 sal_Int32 nLen = rTxt.getLength(); 114 OUStringBuffer aBuf( nLen ); 115 sal_Int32 nCnt = 0; 116 for (sal_Int32 i = 0; i < nLen; ++i) 117 { 118 sal_Unicode cChar = rTxt[i]; 119 if (CH_TXTATR_INWORD != cChar) 120 { 121 if (IsControlChar( cChar )) 122 cChar = ' '; 123 DBG_ASSERT( nCnt < nLen, "index out of range" ); 124 aBuf.setCharAt( nCnt++, cChar ); 125 } 126 } 127 aBuf.setLength( nCnt ); 128 rTxt = aBuf.makeStringAndClear(); 129 bModified = sal_True; 130 } 131 return bModified; 132 } 133 134 135 String GetThesaurusReplaceText( const String &rText ) 136 { 137 // The strings for synonyms returned by the thesaurus sometimes have some 138 // explanation text put in between '(' and ')' or a trailing '*'. 139 // These parts should not be put in the ReplaceEdit Text that may get 140 // inserted into the document. Thus we strip them from the text. 141 142 String aText( rText ); 143 144 xub_StrLen nPos = aText.Search( sal_Unicode('(') ); 145 while (STRING_NOTFOUND != nPos) 146 { 147 xub_StrLen nEnd = aText.Search( sal_Unicode(')'), nPos ); 148 if (STRING_NOTFOUND != nEnd) 149 aText.Erase( nPos, nEnd-nPos+1 ); 150 else 151 break; 152 nPos = aText.Search( sal_Unicode('(') ); 153 } 154 155 nPos = aText.Search( sal_Unicode('*') ); 156 if (STRING_NOTFOUND != nPos) 157 aText.Erase( nPos ); 158 159 // remove any possible remaining ' ' that may confuse the thesaurus 160 // when it gets called with the text 161 aText.EraseLeadingAndTrailingChars( sal_Unicode(' ') ); 162 163 return aText; 164 } 165 166 /////////////////////////////////////////////////////////////////////////// 167 168 } // namespace linguistic 169 170