xref: /trunk/main/sal/osl/os2/nlsupport.c (revision 509a48ff)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #define INCL_WIN
25 #include "svpm.h"
26 
27 #include <osl/nlsupport.h>
28 #include <osl/diagnose.h>
29 #include <osl/process.h>
30 #include <rtl/memory.h>
31 
32 #include <string.h>
33 
34 /*****************************************************************************
35  typedefs
36  *****************************************************************************/
37 
38 
39 typedef struct {
40     const char              *key;
41     const rtl_TextEncoding   value;
42 } _pair;
43 
44 
45 /*****************************************************************************
46  compare function for binary search
47  *****************************************************************************/
48 
49 static int
_pair_compare(const char * key,const _pair * pair)50 _pair_compare (const char *key, const _pair *pair)
51 {
52     int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
53     return result;
54 }
55 
56 /*****************************************************************************
57  binary search on encoding tables
58  *****************************************************************************/
59 
60 static const _pair*
_pair_search(const char * key,const _pair * base,unsigned int member)61 _pair_search (const char *key, const _pair *base, unsigned int member )
62 {
63     unsigned int lower = 0;
64     unsigned int upper = member;
65     unsigned int current;
66     int comparison;
67 
68     /* check for validity of input */
69     if ( (key == NULL) || (base == NULL) || (member == 0) )
70         return NULL;
71 
72     /* binary search */
73     while ( lower < upper )
74     {
75         current = (lower + upper) / 2;
76         comparison = _pair_compare( key, base + current );
77         if (comparison < 0)
78             upper = current;
79         else if (comparison > 0)
80             lower = current + 1;
81         else
82             return base + current;
83     }
84 
85     return NULL;
86 }
87 
88 
89 /*****************************************************************************
90  convert rtl_Locale to locale string
91  *****************************************************************************/
92 
_compose_locale(rtl_Locale * pLocale,char * buffer,size_t n)93 static char * _compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
94 {
95     /* check if a valid locale is specified */
96     if( pLocale && pLocale->Language && (pLocale->Language->length == 2) )
97     {
98         size_t offset = 0;
99 
100         /* convert language code to ascii */
101         {
102             rtl_String *pLanguage = NULL;
103 
104             rtl_uString2String( &pLanguage,
105                 pLocale->Language->buffer, pLocale->Language->length,
106                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
107 
108             if( pLanguage->length < n )
109             {
110                 strcpy( buffer, pLanguage->buffer );
111                 offset = pLanguage->length;
112             }
113 
114             rtl_string_release( pLanguage );
115         }
116 
117         /* convert country code to ascii */
118         if( pLocale->Country && (pLocale->Country->length == 2) )
119         {
120             rtl_String *pCountry = NULL;
121 
122             rtl_uString2String( &pCountry,
123                 pLocale->Country->buffer, pLocale->Country->length,
124                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
125 
126             if( offset + pCountry->length + 1 < n )
127             {
128                 strcpy( buffer + offset++, "_" );
129                 strcpy( buffer + offset, pCountry->buffer );
130                 offset += pCountry->length;
131             }
132 
133             rtl_string_release( pCountry );
134         }
135 
136         /* convert variant to ascii - check if there is enough space for the variant string */
137         if( pLocale->Variant && pLocale->Variant->length &&
138             ( pLocale->Variant->length < n - 6 ) )
139         {
140             rtl_String *pVariant = NULL;
141 
142             rtl_uString2String( &pVariant,
143                 pLocale->Variant->buffer, pLocale->Variant->length,
144                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
145 
146             if( offset + pVariant->length + 1 < n )
147             {
148                 strcpy( buffer + offset, pVariant->buffer );
149                 offset += pVariant->length;
150             }
151 
152             rtl_string_release( pVariant );
153         }
154 
155         return buffer;
156     }
157 
158     return NULL;
159 }
160 
161 /*****************************************************************************
162  convert locale string to rtl_Locale
163  *****************************************************************************/
164 
_parse_locale(const char * locale)165 static rtl_Locale * _parse_locale( const char * locale )
166 {
167     static sal_Unicode c_locale[2] = { (sal_Unicode) 'C', 0 };
168 
169     /* check if locale contains a valid string */
170     if( locale )
171     {
172         size_t len = strlen( locale );
173 
174         if( len >= 2 )
175         {
176             rtl_uString * pLanguage = NULL;
177             rtl_uString * pCountry  = NULL;
178             rtl_uString * pVariant  = NULL;
179 
180             size_t offset = 2;
181 
182             /* convert language code to unicode */
183             rtl_string2UString( &pLanguage, locale, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
184             OSL_ASSERT(pLanguage != NULL);
185 
186             /* convert country code to unicode */
187             if( len >= 5 && '_' == locale[2] )
188             {
189                 rtl_string2UString( &pCountry, locale + 3, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
190                 OSL_ASSERT(pCountry != NULL);
191                 offset = 5;
192             }
193 
194             /* convert variant code to unicode - do not rely on "." as delimiter */
195             if( len > offset ) {
196                 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
197                 OSL_ASSERT(pVariant != NULL);
198             }
199 
200             rtl_Locale * ret =  rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : c_locale + 1, pVariant ? pVariant->buffer : c_locale + 1 );
201 
202             if (pVariant) rtl_uString_release(pVariant);
203             if (pCountry) rtl_uString_release(pCountry);
204             if (pLanguage) rtl_uString_release(pLanguage);
205 
206 			return ret;
207         }
208         else
209             return rtl_locale_register( c_locale, c_locale + 1, c_locale + 1 );
210     }
211 
212     return NULL;
213 }
214 
215 /*
216  * This implementation of osl_getTextEncodingFromLocale maps
217  * from the ISO language codes.
218  */
219 
220 const _pair _full_locale_list[] = {
221     { "ja_JP.eucJP",  RTL_TEXTENCODING_EUC_JP      },
222     { "ja_JP.EUC",    RTL_TEXTENCODING_EUC_JP      },
223     { "ko_KR.EUC",    RTL_TEXTENCODING_EUC_KR      },
224     { "zh_CN.EUC",    RTL_TEXTENCODING_EUC_CN      },
225     { "zh_TW.EUC",    RTL_TEXTENCODING_EUC_TW      }
226 };
227 
228 const _pair _locale_extension_list[] = {
229     { "big5",         RTL_TEXTENCODING_BIG5        },
230     { "big5hk",       RTL_TEXTENCODING_BIG5_HKSCS  },
231     { "gb18030",      RTL_TEXTENCODING_GB_18030    },
232     { "euc",          RTL_TEXTENCODING_EUC_JP      },
233     { "iso8859-1",    RTL_TEXTENCODING_ISO_8859_1  },
234     { "iso8859-10",   RTL_TEXTENCODING_ISO_8859_10 },
235     { "iso8859-13",   RTL_TEXTENCODING_ISO_8859_13 },
236     { "iso8859-14",   RTL_TEXTENCODING_ISO_8859_14 },
237     { "iso8859-15",   RTL_TEXTENCODING_ISO_8859_15 },
238     { "iso8859-2",    RTL_TEXTENCODING_ISO_8859_2  },
239     { "iso8859-3",    RTL_TEXTENCODING_ISO_8859_3  },
240     { "iso8859-4",    RTL_TEXTENCODING_ISO_8859_4  },
241     { "iso8859-5",    RTL_TEXTENCODING_ISO_8859_5  },
242     { "iso8859-6",    RTL_TEXTENCODING_ISO_8859_6  },
243     { "iso8859-7",    RTL_TEXTENCODING_ISO_8859_7  },
244     { "iso8859-8",    RTL_TEXTENCODING_ISO_8859_8  },
245     { "iso8859-9",    RTL_TEXTENCODING_ISO_8859_9  },
246     { "koi8-r",       RTL_TEXTENCODING_KOI8_R      },
247     { "koi8-u",       RTL_TEXTENCODING_KOI8_U      },
248     { "pck",          RTL_TEXTENCODING_MS_932      },
249 #if (0)
250     { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW    },
251 #endif
252     { "utf-16",       RTL_TEXTENCODING_UNICODE     },
253     { "utf-7",        RTL_TEXTENCODING_UTF7        },
254     { "utf-8",        RTL_TEXTENCODING_UTF8        }
255 };
256 
257 const _pair _iso_language_list[] = {
258     { "af",  RTL_TEXTENCODING_ISO_8859_1 },
259     { "ar",  RTL_TEXTENCODING_ISO_8859_6 },
260     { "az",  RTL_TEXTENCODING_ISO_8859_9 },
261     { "be",  RTL_TEXTENCODING_ISO_8859_5 },
262     { "bg",  RTL_TEXTENCODING_ISO_8859_5 },
263     { "ca",  RTL_TEXTENCODING_ISO_8859_1 },
264     { "cs",  RTL_TEXTENCODING_ISO_8859_2 },
265     { "da",  RTL_TEXTENCODING_ISO_8859_1 },
266     { "de",  RTL_TEXTENCODING_ISO_8859_1 },
267     { "el",  RTL_TEXTENCODING_ISO_8859_7 },
268     { "en",  RTL_TEXTENCODING_ISO_8859_1 },
269     { "es",  RTL_TEXTENCODING_ISO_8859_1 },
270     { "et",  RTL_TEXTENCODING_ISO_8859_4 },
271     { "eu",  RTL_TEXTENCODING_ISO_8859_1 },
272     { "fa",  RTL_TEXTENCODING_ISO_8859_6 },
273     { "fi",  RTL_TEXTENCODING_ISO_8859_1 },
274     { "fo",  RTL_TEXTENCODING_ISO_8859_1 },
275     { "fr",  RTL_TEXTENCODING_ISO_8859_1 },
276     { "gr",  RTL_TEXTENCODING_ISO_8859_7 },
277     { "he",  RTL_TEXTENCODING_ISO_8859_8 },
278     { "hi",  RTL_TEXTENCODING_DONTKNOW },
279     { "hr",  RTL_TEXTENCODING_ISO_8859_2 },
280     { "hu",  RTL_TEXTENCODING_ISO_8859_2 },
281     { "hy",  RTL_TEXTENCODING_DONTKNOW },
282     { "id",  RTL_TEXTENCODING_ISO_8859_1 },
283     { "is",  RTL_TEXTENCODING_ISO_8859_1 },
284     { "it",  RTL_TEXTENCODING_ISO_8859_1 },
285     { "iw",  RTL_TEXTENCODING_ISO_8859_8 },
286     { "ja",  RTL_TEXTENCODING_EUC_JP },
287     { "ka",  RTL_TEXTENCODING_DONTKNOW },
288     { "kk",  RTL_TEXTENCODING_ISO_8859_5 },
289     { "ko",  RTL_TEXTENCODING_EUC_KR },
290     { "lt",  RTL_TEXTENCODING_ISO_8859_4 },
291     { "lv",  RTL_TEXTENCODING_ISO_8859_4 },
292     { "mk",  RTL_TEXTENCODING_ISO_8859_5 },
293     { "mr",  RTL_TEXTENCODING_DONTKNOW },
294     { "ms",  RTL_TEXTENCODING_ISO_8859_1 },
295     { "nl",  RTL_TEXTENCODING_ISO_8859_1 },
296     { "no",  RTL_TEXTENCODING_ISO_8859_1 },
297     { "pl",  RTL_TEXTENCODING_ISO_8859_2 },
298     { "pt",  RTL_TEXTENCODING_ISO_8859_1 },
299     { "ro",  RTL_TEXTENCODING_ISO_8859_2 },
300     { "ru",  RTL_TEXTENCODING_ISO_8859_5 },
301     { "sa",  RTL_TEXTENCODING_DONTKNOW },
302     { "sk",  RTL_TEXTENCODING_ISO_8859_2 },
303     { "sl",  RTL_TEXTENCODING_ISO_8859_2 },
304     { "sq",  RTL_TEXTENCODING_ISO_8859_2 },
305     { "sv",  RTL_TEXTENCODING_ISO_8859_1 },
306     { "sw",  RTL_TEXTENCODING_ISO_8859_1 },
307     { "ta",  RTL_TEXTENCODING_DONTKNOW },
308     { "th",  RTL_TEXTENCODING_DONTKNOW },
309     { "tr",  RTL_TEXTENCODING_ISO_8859_9 },
310     { "tt",  RTL_TEXTENCODING_ISO_8859_5 },
311     { "uk",  RTL_TEXTENCODING_ISO_8859_5 },
312     { "ur",  RTL_TEXTENCODING_ISO_8859_6 },
313     { "uz",  RTL_TEXTENCODING_ISO_8859_9 },
314     { "vi",  RTL_TEXTENCODING_DONTKNOW },
315     { "zh",  RTL_TEXTENCODING_BIG5 }
316 };
317 
318 /*****************************************************************************
319  return the text encoding corresponding to the given locale
320  *****************************************************************************/
321 
osl_getTextEncodingFromLocale(rtl_Locale * pLocale)322 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
323 {
324     const _pair *language = NULL;
325     char locale_buf[64] = "";
326     char *cp;
327 
328 	WinMessageBox(HWND_DESKTOP,HWND_DESKTOP,
329 		"Please contact technical support and report above informations.\n\n",
330 		"Critical error: osl_getTextEncodingFromLocale",
331 		0, MB_ERROR | MB_OK | MB_MOVEABLE);
332 
333     /* default to process locale if pLocale == NULL */
334     if( NULL == pLocale )
335         osl_getProcessLocale( &pLocale );
336 
337     /* convert rtl_Locale to locale string */
338     if( _compose_locale( pLocale, locale_buf, 64 ) )
339     {
340         /* check special handling list (EUC) first */
341         const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair );
342         language = _pair_search( locale_buf, _full_locale_list, members);
343 
344         if( NULL == language )
345         {
346             /*
347              *  check if there is a charset qualifier at the end of the given locale string
348              *  e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
349              *  charset to use
350              */
351 		    cp = strrchr( locale_buf, '.' );
352 
353             if( NULL != cp )
354             {
355                 const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair );
356                 language = _pair_search( cp + 1, _locale_extension_list, members);
357             }
358         }
359 
360         /* use iso language code to determine the charset */
361         if( NULL == language )
362         {
363             const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair );
364 
365             /* iso lang codes have 2 charaters */
366             locale_buf[2] = '\0';
367 
368             language = _pair_search( locale_buf, _iso_language_list, members);
369         }
370     }
371 
372     /* a matching item in our list provides a mapping from codeset to
373      * rtl-codeset */
374     if ( language != NULL )
375         return language->value;
376 
377     return RTL_TEXTENCODING_DONTKNOW;
378 }
379 
380 /*****************************************************************************
381  return the current process locale
382  *****************************************************************************/
383 
_imp_getProcessLocale(rtl_Locale ** ppLocale)384 void _imp_getProcessLocale( rtl_Locale ** ppLocale )
385 {
386     /* simulate behavior off setlocale */
387     char * locale = getenv( "LC_ALL" );
388 
389     if( NULL == locale )
390         locale = getenv( "LC_CTYPE" );
391 
392     if( NULL == locale )
393         locale = getenv( "LANG" );
394 
395     if( NULL == locale )
396         locale = "C";
397 
398     *ppLocale = _parse_locale( locale );
399 }
400 
401 /*****************************************************************************
402  set the current process locale
403  *****************************************************************************/
404 
_imp_setProcessLocale(rtl_Locale * pLocale)405 int _imp_setProcessLocale( rtl_Locale * pLocale )
406 {
407     char locale_buf[64];
408 
409     /* convert rtl_Locale to locale string */
410     if( NULL != _compose_locale( pLocale, locale_buf, 64 ) )
411     {
412         /* only change env vars that exist already */
413         if( getenv( "LC_ALL" ) ) {
414 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
415             setenv( "LC_ALL", locale_buf, 1);
416 #else
417             setenv( "LC_ALL", locale_buf );
418 #endif
419         }
420 
421         if( getenv( "LC_CTYPE" ) ) {
422 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
423             setenv("LC_CTYPE", locale_buf, 1 );
424 #else
425             setenv( "LC_CTYPE", locale_buf );
426 #endif
427         }
428 
429         if( getenv( "LANG" ) ) {
430 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
431             setenv("LC_CTYPE", locale_buf, 1 );
432 #else
433             setenv( "LANG", locale_buf );
434 #endif
435         }
436     }
437 
438     return 0;
439 }
440 
441 
442