1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_lingucomponent.hxx"
26 
27 #if defined(WNT)
28 #include <tools/prewin.h>
29 #endif
30 
31 #if defined(WNT)
32 #include <Windows.h>
33 #endif
34 
35 #if defined(WNT)
36 #include <tools/postwin.h>
37 #endif
38 
39 
40 #include <osl/thread.h>
41 #include <osl/file.hxx>
42 #include <tools/debug.hxx>
43 #include <tools/urlobj.hxx>
44 #include <i18npool/mslangid.hxx>
45 #include <unotools/lingucfg.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <rtl/ustring.hxx>
48 #include <rtl/string.hxx>
49 #include <rtl/tencinfo.h>
50 #include <linguistic/misc.hxx>
51 
52 #include <set>
53 #include <vector>
54 #include <string.h>
55 
56 #include <lingutil.hxx>
57 
58 
59 
60 
61 using ::com::sun::star::lang::Locale;
62 using namespace ::com::sun::star;
63 
64 #if 0
65 //////////////////////////////////////////////////////////////////////
66 
67 String GetDirectoryPathFromFileURL( const String &rFileURL )
68 {
69     // get file URL
70     INetURLObject aURLObj;
71     aURLObj.SetSmartProtocol( INET_PROT_FILE );
72     aURLObj.SetSmartURL( rFileURL );
73     aURLObj.removeSegment();
74     DBG_ASSERT( !aURLObj.HasError(), "invalid URL" );
75     String aRes = aURLObj.GetMainURL( INetURLObject::DECODE_TO_IURI );
76     return aRes;
77 }
78 #endif
79 
80 #if defined(WNT)
Win_GetShortPathName(const rtl::OUString & rLongPathName)81 rtl::OString Win_GetShortPathName( const rtl::OUString &rLongPathName )
82 {
83     rtl::OString aRes;
84 
85     sal_Unicode aShortBuffer[1024] = {0};
86     sal_Int32   nShortBufSize = sizeof( aShortBuffer ) / sizeof( aShortBuffer[0] );
87 
88     // use the version of 'GetShortPathName' that can deal with Unicode...
89     sal_Int32 nShortLen = GetShortPathNameW(
90             reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
91             reinterpret_cast<LPWSTR>( aShortBuffer ),
92             nShortBufSize );
93 
94     if (nShortLen < nShortBufSize) // conversion successful?
95         aRes = rtl::OString( OU2ENC( rtl::OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
96     else
97         DBG_ERROR( "Win_GetShortPathName: buffer to short" );
98 
99     return aRes;
100 }
101 #endif //defined(WNT)
102 
103 //////////////////////////////////////////////////////////////////////
104 
105 // build list of old style diuctionaries (not as extensions) to use.
106 // User installed dictionaries (the ones residing in the user paths)
107 // will get precedence over system installed ones for the same language.
GetOldStyleDics(const char * pDicType)108 std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
109 {
110     std::vector< SvtLinguConfigDictionaryEntry > aRes;
111 
112 	if (!pDicType)
113 		return aRes;
114 
115 	rtl::OUString aFormatName;
116 	String aDicExtension;
117 #ifdef SYSTEM_DICTS
118 	rtl::OUString aSystemDir;
119 	rtl::OUString aSystemPrefix;
120 	rtl::OUString aSystemSuffix;
121 #endif
122     if (strcmp( pDicType, "DICT" ) == 0)
123 	{
124 		aFormatName		= A2OU("DICT_SPELL");
125 		aDicExtension	= String::CreateFromAscii( ".dic" );
126 #ifdef SYSTEM_DICTS
127 		aSystemDir		= A2OU( DICT_SYSTEM_DIR );
128 		aSystemSuffix		= aDicExtension;
129 #endif
130 	}
131     else if (strcmp( pDicType, "HYPH" ) == 0)
132 	{
133 		aFormatName		= A2OU("DICT_HYPH");
134 		aDicExtension	= String::CreateFromAscii( ".dic" );
135 #ifdef SYSTEM_DICTS
136 		aSystemDir		= A2OU( HYPH_SYSTEM_DIR );
137 		aSystemPrefix		= A2OU( "hyph_" );
138 		aSystemSuffix		= aDicExtension;
139 #endif
140 	}
141     else if (strcmp( pDicType, "THES" ) == 0)
142 	{
143 		aFormatName		= A2OU("DICT_THES");
144 		aDicExtension	= String::CreateFromAscii( ".dat" );
145 #ifdef SYSTEM_DICTS
146 		aSystemDir		= A2OU( THES_SYSTEM_DIR );
147 		aSystemPrefix		= A2OU( "th_" );
148 		aSystemSuffix		= A2OU( "_v2.dat" );
149 #endif
150 	}
151 
152 
153 	if (aFormatName.getLength() == 0 || aDicExtension.Len() == 0)
154 		return aRes;
155 
156 	// set of languages to remember the language where it is already
157 	// decided to make use of the dictionary.
158 	std::set< LanguageType > aDicLangInUse;
159 
160 #ifdef SYSTEM_DICTS
161    osl::Directory aSystemDicts(aSystemDir);
162    if (aSystemDicts.open() == osl::FileBase::E_None)
163    {
164        osl::DirectoryItem aItem;
165        osl::FileStatus aFileStatus(FileStatusMask_FileURL);
166        while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
167        {
168            aItem.getFileStatus(aFileStatus);
169            rtl::OUString sPath = aFileStatus.getFileURL();
170            if (sPath.lastIndexOf(aSystemSuffix) == sPath.getLength()-aSystemSuffix.getLength())
171            {
172                sal_Int32 nStartIndex = sPath.lastIndexOf(sal_Unicode('/')) + 1;
173                if (!sPath.match(aSystemPrefix, nStartIndex))
174                    continue;
175                rtl::OUString sChunk = sPath.copy(0, sPath.getLength() - aSystemSuffix.getLength());
176                sal_Int32 nIndex = nStartIndex + aSystemPrefix.getLength();
177                rtl::OUString sLang = sChunk.getToken( 0, '_', nIndex );
178                if (!sLang.getLength())
179                    continue;
180                rtl::OUString sRegion;
181                if (nIndex != -1)
182                    sRegion = sChunk.copy( nIndex, sChunk.getLength() - nIndex );
183 
184                // Thus we first get the language of the dictionary
185                LanguageType nLang = MsLangId::convertIsoNamesToLanguage(
186                   sLang, sRegion );
187 
188                if (aDicLangInUse.count( nLang ) == 0)
189                {
190                    // remember the new language in use
191                    aDicLangInUse.insert( nLang );
192 
193                    // add the dictionary to the resulting vector
194                    SvtLinguConfigDictionaryEntry aDicEntry;
195                    aDicEntry.aLocations.realloc(1);
196                    aDicEntry.aLocaleNames.realloc(1);
197                    rtl::OUString aLocaleName( MsLangId::convertLanguageToIsoString( nLang ) );
198                    aDicEntry.aLocations[0] = sPath;
199                    aDicEntry.aFormatName = aFormatName;
200                    aDicEntry.aLocaleNames[0] = aLocaleName;
201                    aRes.push_back( aDicEntry );
202                }
203            }
204        }
205     }
206 
207 #endif
208 
209     return aRes;
210 }
211 
212 
MergeNewStyleDicsAndOldStyleDics(std::list<SvtLinguConfigDictionaryEntry> & rNewStyleDics,const std::vector<SvtLinguConfigDictionaryEntry> & rOldStyleDics)213 void MergeNewStyleDicsAndOldStyleDics(
214 	std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
215 	const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
216 {
217 	// get list of languages supported by new style dictionaries
218 	std::set< LanguageType > aNewStyleLanguages;
219 	std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
220 	for (aIt = rNewStyleDics.begin() ;  aIt != rNewStyleDics.end();  ++aIt)
221 	{
222 		const uno::Sequence< rtl::OUString > aLocaleNames( aIt->aLocaleNames );
223 		sal_Int32 nLocaleNames = aLocaleNames.getLength();
224 		for (sal_Int32 k = 0;  k < nLocaleNames; ++k)
225 		{
226 			LanguageType nLang = MsLangId::convertIsoStringToLanguage( aLocaleNames[k] );
227 			aNewStyleLanguages.insert( nLang );
228 		}
229 	}
230 
231 	// now check all old style dictionaries if they will add a not yet
232 	// added language. If so add them to the resulting vector
233 	std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
234 	for (aIt2 = rOldStyleDics.begin();  aIt2 != rOldStyleDics.end();  ++aIt2)
235 	{
236 		sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
237 
238 		// old style dics should only have one language listed...
239 		DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!");
240 		if (nOldStyleDics > 0)
241 		{
242 			LanguageType nLang = MsLangId::convertIsoStringToLanguage( aIt2->aLocaleNames[0] );
243 
244             if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_NONE)
245             {
246                 DBG_ERROR( "old style dictionary with invalid language found!" );
247                 continue;
248             }
249 
250 			// language not yet added?
251 			if (aNewStyleLanguages.count( nLang ) == 0)
252 				rNewStyleDics.push_back( *aIt2 );
253 		}
254 		else
255 		{
256 			DBG_ERROR( "old style dictionary with no language found!" );
257 		}
258 	}
259 }
260 
261 
getTextEncodingFromCharset(const sal_Char * pCharset)262 rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset)
263 {
264     // default result: used to indicate that we failed to get the proper encoding
265     rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
266 
267     if (pCharset)
268     {
269         eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
270         if (eRet == RTL_TEXTENCODING_DONTKNOW)
271             eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
272         if (eRet == RTL_TEXTENCODING_DONTKNOW)
273         {
274             if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
275                 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
276         }
277     }
278     return eRet;
279 }
280 
281 //////////////////////////////////////////////////////////////////////
282 
283