1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_lingucomponent.hxx"
30 
31 #if defined(WNT)
32 #include <tools/prewin.h>
33 #endif
34 
35 #if defined(WNT)
36 #include <Windows.h>
37 #endif
38 
39 #if defined(WNT)
40 #include <tools/postwin.h>
41 #endif
42 
43 
44 #include <osl/thread.h>
45 #include <osl/file.hxx>
46 #include <tools/debug.hxx>
47 #include <tools/urlobj.hxx>
48 #include <i18npool/mslangid.hxx>
49 #include <unotools/lingucfg.hxx>
50 #include <unotools/pathoptions.hxx>
51 #include <rtl/ustring.hxx>
52 #include <rtl/string.hxx>
53 #include <rtl/tencinfo.h>
54 #include <linguistic/misc.hxx>
55 
56 #include <set>
57 #include <vector>
58 #include <string.h>
59 
60 #include <lingutil.hxx>
61 #include <dictmgr.hxx>
62 
63 
64 
65 
66 using ::com::sun::star::lang::Locale;
67 using namespace ::com::sun::star;
68 
69 #if 0
70 //////////////////////////////////////////////////////////////////////
71 
72 String GetDirectoryPathFromFileURL( const String &rFileURL )
73 {
74     // get file URL
75     INetURLObject aURLObj;
76     aURLObj.SetSmartProtocol( INET_PROT_FILE );
77     aURLObj.SetSmartURL( rFileURL );
78     aURLObj.removeSegment();
79     DBG_ASSERT( !aURLObj.HasError(), "invalid URL" );
80     String aRes = aURLObj.GetMainURL( INetURLObject::DECODE_TO_IURI );
81     return aRes;
82 }
83 #endif
84 
85 #if defined(WNT)
86 rtl::OString Win_GetShortPathName( const rtl::OUString &rLongPathName )
87 {
88     rtl::OString aRes;
89 
90     sal_Unicode aShortBuffer[1024] = {0};
91     sal_Int32   nShortBufSize = sizeof( aShortBuffer ) / sizeof( aShortBuffer[0] );
92 
93     // use the version of 'GetShortPathName' that can deal with Unicode...
94     sal_Int32 nShortLen = GetShortPathNameW(
95             reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
96             reinterpret_cast<LPWSTR>( aShortBuffer ),
97             nShortBufSize );
98 
99     if (nShortLen < nShortBufSize) // conversion successful?
100         aRes = rtl::OString( OU2ENC( rtl::OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
101     else
102         DBG_ERROR( "Win_GetShortPathName: buffer to short" );
103 
104     return aRes;
105 }
106 #endif //defined(WNT)
107 
108 //////////////////////////////////////////////////////////////////////
109 
110 // build list of old style diuctionaries (not as extensions) to use.
111 // User installed dictionaries (the ones residing in the user paths)
112 // will get precedence over system installed ones for the same language.
113 std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
114 {
115     std::vector< SvtLinguConfigDictionaryEntry > aRes;
116 
117 	if (!pDicType)
118 		return aRes;
119 
120 	rtl::OUString aFormatName;
121 	String aDicExtension;
122 #ifdef SYSTEM_DICTS
123 	rtl::OUString aSystemDir;
124 	rtl::OUString aSystemPrefix;
125 	rtl::OUString aSystemSuffix;
126 #endif
127 	bool bSpell = false;
128 	bool bHyph  = false;
129 	bool bThes  = false;
130     if (strcmp( pDicType, "DICT" ) == 0)
131 	{
132 		aFormatName		= A2OU("DICT_SPELL");
133 		aDicExtension	= String::CreateFromAscii( ".dic" );
134 #ifdef SYSTEM_DICTS
135 		aSystemDir		= A2OU( DICT_SYSTEM_DIR );
136 		aSystemSuffix		= aDicExtension;
137 #endif
138 		bSpell = true;
139 	}
140     else if (strcmp( pDicType, "HYPH" ) == 0)
141 	{
142 		aFormatName		= A2OU("DICT_HYPH");
143 		aDicExtension	= String::CreateFromAscii( ".dic" );
144 #ifdef SYSTEM_DICTS
145 		aSystemDir		= A2OU( HYPH_SYSTEM_DIR );
146 		aSystemPrefix		= A2OU( "hyph_" );
147 		aSystemSuffix		= aDicExtension;
148 #endif
149 		bHyph = true;
150 	}
151     else if (strcmp( pDicType, "THES" ) == 0)
152 	{
153 		aFormatName		= A2OU("DICT_THES");
154 		aDicExtension	= String::CreateFromAscii( ".dat" );
155 #ifdef SYSTEM_DICTS
156 		aSystemDir		= A2OU( THES_SYSTEM_DIR );
157 		aSystemPrefix		= A2OU( "th_" );
158 		aSystemSuffix		= A2OU( "_v2.dat" );
159 #endif
160 		bThes = true;
161 	}
162 
163 
164 	if (aFormatName.getLength() == 0 || aDicExtension.Len() == 0)
165 		return aRes;
166 
167 	// set of languages to remember the language where it is already
168 	// decided to make use of the dictionary.
169 	std::set< LanguageType > aDicLangInUse;
170 
171 #ifdef SYSTEM_DICTS
172    osl::Directory aSystemDicts(aSystemDir);
173    if (aSystemDicts.open() == osl::FileBase::E_None)
174    {
175        osl::DirectoryItem aItem;
176        osl::FileStatus aFileStatus(FileStatusMask_FileURL);
177        while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
178        {
179            aItem.getFileStatus(aFileStatus);
180            rtl::OUString sPath = aFileStatus.getFileURL();
181            if (sPath.lastIndexOf(aSystemSuffix) == sPath.getLength()-aSystemSuffix.getLength())
182            {
183                sal_Int32 nStartIndex = sPath.lastIndexOf(sal_Unicode('/')) + 1;
184                if (!sPath.match(aSystemPrefix, nStartIndex))
185                    continue;
186                rtl::OUString sChunk = sPath.copy(0, sPath.getLength() - aSystemSuffix.getLength());
187                sal_Int32 nIndex = nStartIndex + aSystemPrefix.getLength();
188                rtl::OUString sLang = sChunk.getToken( 0, '_', nIndex );
189                if (!sLang.getLength())
190                    continue;
191                rtl::OUString sRegion;
192                if (nIndex != -1)
193                    sRegion = sChunk.copy( nIndex, sChunk.getLength() - nIndex );
194 
195                // Thus we first get the language of the dictionary
196                LanguageType nLang = MsLangId::convertIsoNamesToLanguage(
197                   sLang, sRegion );
198 
199                if (aDicLangInUse.count( nLang ) == 0)
200                {
201                    // remember the new language in use
202                    aDicLangInUse.insert( nLang );
203 
204                    // add the dictionary to the resulting vector
205                    SvtLinguConfigDictionaryEntry aDicEntry;
206                    aDicEntry.aLocations.realloc(1);
207                    aDicEntry.aLocaleNames.realloc(1);
208                    rtl::OUString aLocaleName( MsLangId::convertLanguageToIsoString( nLang ) );
209                    aDicEntry.aLocations[0] = sPath;
210                    aDicEntry.aFormatName = aFormatName;
211                    aDicEntry.aLocaleNames[0] = aLocaleName;
212                    aRes.push_back( aDicEntry );
213                }
214            }
215        }
216     }
217 
218 #endif
219 
220     return aRes;
221 }
222 
223 
224 void MergeNewStyleDicsAndOldStyleDics(
225 	std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
226 	const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
227 {
228 	// get list of languages supported by new style dictionaries
229 	std::set< LanguageType > aNewStyleLanguages;
230 	std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
231 	for (aIt = rNewStyleDics.begin() ;  aIt != rNewStyleDics.end();  ++aIt)
232 	{
233 		const uno::Sequence< rtl::OUString > aLocaleNames( aIt->aLocaleNames );
234 		sal_Int32 nLocaleNames = aLocaleNames.getLength();
235 		for (sal_Int32 k = 0;  k < nLocaleNames; ++k)
236 		{
237 			LanguageType nLang = MsLangId::convertIsoStringToLanguage( aLocaleNames[k] );
238 			aNewStyleLanguages.insert( nLang );
239 		}
240 	}
241 
242 	// now check all old style dictionaries if they will add a not yet
243 	// added language. If so add them to the resulting vector
244 	std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
245 	for (aIt2 = rOldStyleDics.begin();  aIt2 != rOldStyleDics.end();  ++aIt2)
246 	{
247 		sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
248 
249 		// old style dics should only have one language listed...
250 		DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!");
251 		if (nOldStyleDics > 0)
252 		{
253 			LanguageType nLang = MsLangId::convertIsoStringToLanguage( aIt2->aLocaleNames[0] );
254 
255             if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_NONE)
256             {
257                 DBG_ERROR( "old style dictionary with invalid language found!" );
258                 continue;
259             }
260 
261 			// language not yet added?
262 			if (aNewStyleLanguages.count( nLang ) == 0)
263 				rNewStyleDics.push_back( *aIt2 );
264 		}
265 		else
266 		{
267 			DBG_ERROR( "old style dictionary with no language found!" );
268 		}
269 	}
270 }
271 
272 
273 rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset)
274 {
275     // default result: used to indicate that we failed to get the proper encoding
276     rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
277 
278     if (pCharset)
279     {
280         eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
281         if (eRet == RTL_TEXTENCODING_DONTKNOW)
282             eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
283         if (eRet == RTL_TEXTENCODING_DONTKNOW)
284         {
285             if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
286                 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
287         }
288     }
289     return eRet;
290 }
291 
292 //////////////////////////////////////////////////////////////////////
293 
294