xref: /aoo42x/main/sal/osl/os2/nlsupport.c (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #define INCL_WIN
29 #include "svpm.h"
30 
31 #include <osl/nlsupport.h>
32 #include <osl/diagnose.h>
33 #include <osl/process.h>
34 #include <rtl/memory.h>
35 
36 #include <string.h>
37 
38 /*****************************************************************************
39  typedefs
40  *****************************************************************************/
41 
42 
43 typedef struct {
44     const char              *key;
45     const rtl_TextEncoding   value;
46 } _pair;
47 
48 
49 /*****************************************************************************
50  compare function for binary search
51  *****************************************************************************/
52 
53 static int
54 _pair_compare (const char *key, const _pair *pair)
55 {
56     int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
57     return result;
58 }
59 
60 /*****************************************************************************
61  binary search on encoding tables
62  *****************************************************************************/
63 
64 static const _pair*
65 _pair_search (const char *key, const _pair *base, unsigned int member )
66 {
67     unsigned int lower = 0;
68     unsigned int upper = member;
69     unsigned int current;
70     int comparison;
71 
72     /* check for validity of input */
73     if ( (key == NULL) || (base == NULL) || (member == 0) )
74         return NULL;
75 
76     /* binary search */
77     while ( lower < upper )
78     {
79         current = (lower + upper) / 2;
80         comparison = _pair_compare( key, base + current );
81         if (comparison < 0)
82             upper = current;
83         else if (comparison > 0)
84             lower = current + 1;
85         else
86             return base + current;
87     }
88 
89     return NULL;
90 }
91 
92 
93 /*****************************************************************************
94  convert rtl_Locale to locale string
95  *****************************************************************************/
96 
97 static char * _compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
98 {
99     /* check if a valid locale is specified */
100     if( pLocale && pLocale->Language && (pLocale->Language->length == 2) )
101     {
102         size_t offset = 0;
103 
104         /* convert language code to ascii */
105         {
106             rtl_String *pLanguage = NULL;
107 
108             rtl_uString2String( &pLanguage,
109                 pLocale->Language->buffer, pLocale->Language->length,
110                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
111 
112             if( pLanguage->length < n )
113             {
114                 strcpy( buffer, pLanguage->buffer );
115                 offset = pLanguage->length;
116             }
117 
118             rtl_string_release( pLanguage );
119         }
120 
121         /* convert country code to ascii */
122         if( pLocale->Country && (pLocale->Country->length == 2) )
123         {
124             rtl_String *pCountry = NULL;
125 
126             rtl_uString2String( &pCountry,
127                 pLocale->Country->buffer, pLocale->Country->length,
128                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
129 
130             if( offset + pCountry->length + 1 < n )
131             {
132                 strcpy( buffer + offset++, "_" );
133                 strcpy( buffer + offset, pCountry->buffer );
134                 offset += pCountry->length;
135             }
136 
137             rtl_string_release( pCountry );
138         }
139 
140         /* convert variant to ascii - check if there is enough space for the variant string */
141         if( pLocale->Variant && pLocale->Variant->length &&
142             ( pLocale->Variant->length < n - 6 ) )
143         {
144             rtl_String *pVariant = NULL;
145 
146             rtl_uString2String( &pVariant,
147                 pLocale->Variant->buffer, pLocale->Variant->length,
148                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
149 
150             if( offset + pVariant->length + 1 < n )
151             {
152                 strcpy( buffer + offset, pVariant->buffer );
153                 offset += pVariant->length;
154             }
155 
156             rtl_string_release( pVariant );
157         }
158 
159         return buffer;
160     }
161 
162     return NULL;
163 }
164 
165 /*****************************************************************************
166  convert locale string to rtl_Locale
167  *****************************************************************************/
168 
169 static rtl_Locale * _parse_locale( const char * locale )
170 {
171     static sal_Unicode c_locale[2] = { (sal_Unicode) 'C', 0 };
172 
173     /* check if locale contains a valid string */
174     if( locale )
175     {
176         size_t len = strlen( locale );
177 
178         if( len >= 2 )
179         {
180             rtl_uString * pLanguage = NULL;
181             rtl_uString * pCountry  = NULL;
182             rtl_uString * pVariant  = NULL;
183 
184             size_t offset = 2;
185 
186             /* convert language code to unicode */
187             rtl_string2UString( &pLanguage, locale, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
188             OSL_ASSERT(pLanguage != NULL);
189 
190             /* convert country code to unicode */
191             if( len >= 5 && '_' == locale[2] )
192             {
193                 rtl_string2UString( &pCountry, locale + 3, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
194                 OSL_ASSERT(pCountry != NULL);
195                 offset = 5;
196             }
197 
198             /* convert variant code to unicode - do not rely on "." as delimiter */
199             if( len > offset ) {
200                 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
201                 OSL_ASSERT(pVariant != NULL);
202             }
203 
204             rtl_Locale * ret =  rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : c_locale + 1, pVariant ? pVariant->buffer : c_locale + 1 );
205 
206             if (pVariant) rtl_uString_release(pVariant);
207             if (pCountry) rtl_uString_release(pCountry);
208             if (pLanguage) rtl_uString_release(pLanguage);
209 
210 			return ret;
211         }
212         else
213             return rtl_locale_register( c_locale, c_locale + 1, c_locale + 1 );
214     }
215 
216     return NULL;
217 }
218 
219 /*
220  * This implementation of osl_getTextEncodingFromLocale maps
221  * from the ISO language codes.
222  */
223 
224 const _pair _full_locale_list[] = {
225     { "ja_JP.eucJP",  RTL_TEXTENCODING_EUC_JP      },
226     { "ja_JP.EUC",    RTL_TEXTENCODING_EUC_JP      },
227     { "ko_KR.EUC",    RTL_TEXTENCODING_EUC_KR      },
228     { "zh_CN.EUC",    RTL_TEXTENCODING_EUC_CN      },
229     { "zh_TW.EUC",    RTL_TEXTENCODING_EUC_TW      }
230 };
231 
232 const _pair _locale_extension_list[] = {
233     { "big5",         RTL_TEXTENCODING_BIG5        },
234     { "big5hk",       RTL_TEXTENCODING_BIG5_HKSCS  },
235     { "gb18030",      RTL_TEXTENCODING_GB_18030    },
236     { "euc",          RTL_TEXTENCODING_EUC_JP      },
237     { "iso8859-1",    RTL_TEXTENCODING_ISO_8859_1  },
238     { "iso8859-10",   RTL_TEXTENCODING_ISO_8859_10 },
239     { "iso8859-13",   RTL_TEXTENCODING_ISO_8859_13 },
240     { "iso8859-14",   RTL_TEXTENCODING_ISO_8859_14 },
241     { "iso8859-15",   RTL_TEXTENCODING_ISO_8859_15 },
242     { "iso8859-2",    RTL_TEXTENCODING_ISO_8859_2  },
243     { "iso8859-3",    RTL_TEXTENCODING_ISO_8859_3  },
244     { "iso8859-4",    RTL_TEXTENCODING_ISO_8859_4  },
245     { "iso8859-5",    RTL_TEXTENCODING_ISO_8859_5  },
246     { "iso8859-6",    RTL_TEXTENCODING_ISO_8859_6  },
247     { "iso8859-7",    RTL_TEXTENCODING_ISO_8859_7  },
248     { "iso8859-8",    RTL_TEXTENCODING_ISO_8859_8  },
249     { "iso8859-9",    RTL_TEXTENCODING_ISO_8859_9  },
250     { "koi8-r",       RTL_TEXTENCODING_KOI8_R      },
251     { "koi8-u",       RTL_TEXTENCODING_KOI8_U      },
252     { "pck",          RTL_TEXTENCODING_MS_932      },
253 #if (0)
254     { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW    },
255 #endif
256     { "utf-16",       RTL_TEXTENCODING_UNICODE     },
257     { "utf-7",        RTL_TEXTENCODING_UTF7        },
258     { "utf-8",        RTL_TEXTENCODING_UTF8        }
259 };
260 
261 const _pair _iso_language_list[] = {
262     { "af",  RTL_TEXTENCODING_ISO_8859_1 },
263     { "ar",  RTL_TEXTENCODING_ISO_8859_6 },
264     { "az",  RTL_TEXTENCODING_ISO_8859_9 },
265     { "be",  RTL_TEXTENCODING_ISO_8859_5 },
266     { "bg",  RTL_TEXTENCODING_ISO_8859_5 },
267     { "ca",  RTL_TEXTENCODING_ISO_8859_1 },
268     { "cs",  RTL_TEXTENCODING_ISO_8859_2 },
269     { "da",  RTL_TEXTENCODING_ISO_8859_1 },
270     { "de",  RTL_TEXTENCODING_ISO_8859_1 },
271     { "el",  RTL_TEXTENCODING_ISO_8859_7 },
272     { "en",  RTL_TEXTENCODING_ISO_8859_1 },
273     { "es",  RTL_TEXTENCODING_ISO_8859_1 },
274     { "et",  RTL_TEXTENCODING_ISO_8859_4 },
275     { "eu",  RTL_TEXTENCODING_ISO_8859_1 },
276     { "fa",  RTL_TEXTENCODING_ISO_8859_6 },
277     { "fi",  RTL_TEXTENCODING_ISO_8859_1 },
278     { "fo",  RTL_TEXTENCODING_ISO_8859_1 },
279     { "fr",  RTL_TEXTENCODING_ISO_8859_1 },
280     { "gr",  RTL_TEXTENCODING_ISO_8859_7 },
281     { "he",  RTL_TEXTENCODING_ISO_8859_8 },
282     { "hi",  RTL_TEXTENCODING_DONTKNOW },
283     { "hr",  RTL_TEXTENCODING_ISO_8859_2 },
284     { "hu",  RTL_TEXTENCODING_ISO_8859_2 },
285     { "hy",  RTL_TEXTENCODING_DONTKNOW },
286     { "id",  RTL_TEXTENCODING_ISO_8859_1 },
287     { "is",  RTL_TEXTENCODING_ISO_8859_1 },
288     { "it",  RTL_TEXTENCODING_ISO_8859_1 },
289     { "iw",  RTL_TEXTENCODING_ISO_8859_8 },
290     { "ja",  RTL_TEXTENCODING_EUC_JP },
291     { "ka",  RTL_TEXTENCODING_DONTKNOW },
292     { "kk",  RTL_TEXTENCODING_ISO_8859_5 },
293     { "ko",  RTL_TEXTENCODING_EUC_KR },
294     { "lt",  RTL_TEXTENCODING_ISO_8859_4 },
295     { "lv",  RTL_TEXTENCODING_ISO_8859_4 },
296     { "mk",  RTL_TEXTENCODING_ISO_8859_5 },
297     { "mr",  RTL_TEXTENCODING_DONTKNOW },
298     { "ms",  RTL_TEXTENCODING_ISO_8859_1 },
299     { "nl",  RTL_TEXTENCODING_ISO_8859_1 },
300     { "no",  RTL_TEXTENCODING_ISO_8859_1 },
301     { "pl",  RTL_TEXTENCODING_ISO_8859_2 },
302     { "pt",  RTL_TEXTENCODING_ISO_8859_1 },
303     { "ro",  RTL_TEXTENCODING_ISO_8859_2 },
304     { "ru",  RTL_TEXTENCODING_ISO_8859_5 },
305     { "sa",  RTL_TEXTENCODING_DONTKNOW },
306     { "sk",  RTL_TEXTENCODING_ISO_8859_2 },
307     { "sl",  RTL_TEXTENCODING_ISO_8859_2 },
308     { "sq",  RTL_TEXTENCODING_ISO_8859_2 },
309     { "sv",  RTL_TEXTENCODING_ISO_8859_1 },
310     { "sw",  RTL_TEXTENCODING_ISO_8859_1 },
311     { "ta",  RTL_TEXTENCODING_DONTKNOW },
312     { "th",  RTL_TEXTENCODING_DONTKNOW },
313     { "tr",  RTL_TEXTENCODING_ISO_8859_9 },
314     { "tt",  RTL_TEXTENCODING_ISO_8859_5 },
315     { "uk",  RTL_TEXTENCODING_ISO_8859_5 },
316     { "ur",  RTL_TEXTENCODING_ISO_8859_6 },
317     { "uz",  RTL_TEXTENCODING_ISO_8859_9 },
318     { "vi",  RTL_TEXTENCODING_DONTKNOW },
319     { "zh",  RTL_TEXTENCODING_BIG5 }
320 };
321 
322 /*****************************************************************************
323  return the text encoding corresponding to the given locale
324  *****************************************************************************/
325 
326 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
327 {
328     const _pair *language = 0;
329     char locale_buf[64] = "";
330     char *cp;
331 
332 	WinMessageBox(HWND_DESKTOP,HWND_DESKTOP,
333 		"Please contact technical support and report above informations.\n\n",
334 		"Critical error: osl_getTextEncodingFromLocale",
335 		0, MB_ERROR | MB_OK | MB_MOVEABLE);
336 
337     /* default to process locale if pLocale == NULL */
338     if( NULL == pLocale )
339         osl_getProcessLocale( &pLocale );
340 
341     /* convert rtl_Locale to locale string */
342     if( _compose_locale( pLocale, locale_buf, 64 ) )
343     {
344         /* check special handling list (EUC) first */
345         const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair );
346         language = _pair_search( locale_buf, _full_locale_list, members);
347 
348         if( NULL == language )
349         {
350             /*
351              *  check if there is a charset qualifier at the end of the given locale string
352              *  e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
353              *  charset to use
354              */
355 		    cp = strrchr( locale_buf, '.' );
356 
357             if( NULL != cp )
358             {
359                 const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair );
360                 language = _pair_search( cp + 1, _locale_extension_list, members);
361             }
362         }
363 
364         /* use iso language code to determine the charset */
365         if( NULL == language )
366         {
367             const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair );
368 
369             /* iso lang codes have 2 charaters */
370             locale_buf[2] = '\0';
371 
372             language = _pair_search( locale_buf, _iso_language_list, members);
373         }
374     }
375 
376     /* a matching item in our list provides a mapping from codeset to
377      * rtl-codeset */
378     if ( language != NULL )
379         return language->value;
380 
381     return RTL_TEXTENCODING_DONTKNOW;
382 }
383 
384 /*****************************************************************************
385  return the current process locale
386  *****************************************************************************/
387 
388 void _imp_getProcessLocale( rtl_Locale ** ppLocale )
389 {
390     /* simulate behavior off setlocale */
391     char * locale = getenv( "LC_ALL" );
392 
393     if( NULL == locale )
394         locale = getenv( "LC_CTYPE" );
395 
396     if( NULL == locale )
397         locale = getenv( "LANG" );
398 
399     if( NULL == locale )
400         locale = "C";
401 
402     *ppLocale = _parse_locale( locale );
403 }
404 
405 /*****************************************************************************
406  set the current process locale
407  *****************************************************************************/
408 
409 int _imp_setProcessLocale( rtl_Locale * pLocale )
410 {
411     char locale_buf[64];
412 
413     /* convert rtl_Locale to locale string */
414     if( NULL != _compose_locale( pLocale, locale_buf, 64 ) )
415     {
416         /* only change env vars that exist already */
417         if( getenv( "LC_ALL" ) ) {
418 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
419             setenv( "LC_ALL", locale_buf, 1);
420 #else
421             setenv( "LC_ALL", locale_buf );
422 #endif
423         }
424 
425         if( getenv( "LC_CTYPE" ) ) {
426 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
427             setenv("LC_CTYPE", locale_buf, 1 );
428 #else
429             setenv( "LC_CTYPE", locale_buf );
430 #endif
431         }
432 
433         if( getenv( "LANG" ) ) {
434 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
435             setenv("LC_CTYPE", locale_buf, 1 );
436 #else
437             setenv( "LANG", locale_buf );
438 #endif
439         }
440     }
441 
442     return 0;
443 }
444 
445 
446