1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #define INCL_WIN
25 #include "svpm.h"
26
27 #include <osl/nlsupport.h>
28 #include <osl/diagnose.h>
29 #include <osl/process.h>
30 #include <rtl/memory.h>
31
32 #include <string.h>
33
34 /*****************************************************************************
35 typedefs
36 *****************************************************************************/
37
38
39 typedef struct {
40 const char *key;
41 const rtl_TextEncoding value;
42 } _pair;
43
44
45 /*****************************************************************************
46 compare function for binary search
47 *****************************************************************************/
48
49 static int
_pair_compare(const char * key,const _pair * pair)50 _pair_compare (const char *key, const _pair *pair)
51 {
52 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
53 return result;
54 }
55
56 /*****************************************************************************
57 binary search on encoding tables
58 *****************************************************************************/
59
60 static const _pair*
_pair_search(const char * key,const _pair * base,unsigned int member)61 _pair_search (const char *key, const _pair *base, unsigned int member )
62 {
63 unsigned int lower = 0;
64 unsigned int upper = member;
65 unsigned int current;
66 int comparison;
67
68 /* check for validity of input */
69 if ( (key == NULL) || (base == NULL) || (member == 0) )
70 return NULL;
71
72 /* binary search */
73 while ( lower < upper )
74 {
75 current = (lower + upper) / 2;
76 comparison = _pair_compare( key, base + current );
77 if (comparison < 0)
78 upper = current;
79 else if (comparison > 0)
80 lower = current + 1;
81 else
82 return base + current;
83 }
84
85 return NULL;
86 }
87
88
89 /*****************************************************************************
90 convert rtl_Locale to locale string
91 *****************************************************************************/
92
_compose_locale(rtl_Locale * pLocale,char * buffer,size_t n)93 static char * _compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
94 {
95 /* check if a valid locale is specified */
96 if( pLocale && pLocale->Language && (pLocale->Language->length == 2) )
97 {
98 size_t offset = 0;
99
100 /* convert language code to ascii */
101 {
102 rtl_String *pLanguage = NULL;
103
104 rtl_uString2String( &pLanguage,
105 pLocale->Language->buffer, pLocale->Language->length,
106 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
107
108 if( pLanguage->length < n )
109 {
110 strcpy( buffer, pLanguage->buffer );
111 offset = pLanguage->length;
112 }
113
114 rtl_string_release( pLanguage );
115 }
116
117 /* convert country code to ascii */
118 if( pLocale->Country && (pLocale->Country->length == 2) )
119 {
120 rtl_String *pCountry = NULL;
121
122 rtl_uString2String( &pCountry,
123 pLocale->Country->buffer, pLocale->Country->length,
124 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
125
126 if( offset + pCountry->length + 1 < n )
127 {
128 strcpy( buffer + offset++, "_" );
129 strcpy( buffer + offset, pCountry->buffer );
130 offset += pCountry->length;
131 }
132
133 rtl_string_release( pCountry );
134 }
135
136 /* convert variant to ascii - check if there is enough space for the variant string */
137 if( pLocale->Variant && pLocale->Variant->length &&
138 ( pLocale->Variant->length < n - 6 ) )
139 {
140 rtl_String *pVariant = NULL;
141
142 rtl_uString2String( &pVariant,
143 pLocale->Variant->buffer, pLocale->Variant->length,
144 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
145
146 if( offset + pVariant->length + 1 < n )
147 {
148 strcpy( buffer + offset, pVariant->buffer );
149 offset += pVariant->length;
150 }
151
152 rtl_string_release( pVariant );
153 }
154
155 return buffer;
156 }
157
158 return NULL;
159 }
160
161 /*****************************************************************************
162 convert locale string to rtl_Locale
163 *****************************************************************************/
164
_parse_locale(const char * locale)165 static rtl_Locale * _parse_locale( const char * locale )
166 {
167 static sal_Unicode c_locale[2] = { (sal_Unicode) 'C', 0 };
168
169 /* check if locale contains a valid string */
170 if( locale )
171 {
172 size_t len = strlen( locale );
173
174 if( len >= 2 )
175 {
176 rtl_uString * pLanguage = NULL;
177 rtl_uString * pCountry = NULL;
178 rtl_uString * pVariant = NULL;
179
180 size_t offset = 2;
181
182 /* convert language code to unicode */
183 rtl_string2UString( &pLanguage, locale, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
184 OSL_ASSERT(pLanguage != NULL);
185
186 /* convert country code to unicode */
187 if( len >= 5 && '_' == locale[2] )
188 {
189 rtl_string2UString( &pCountry, locale + 3, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
190 OSL_ASSERT(pCountry != NULL);
191 offset = 5;
192 }
193
194 /* convert variant code to unicode - do not rely on "." as delimiter */
195 if( len > offset ) {
196 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
197 OSL_ASSERT(pVariant != NULL);
198 }
199
200 rtl_Locale * ret = rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : c_locale + 1, pVariant ? pVariant->buffer : c_locale + 1 );
201
202 if (pVariant) rtl_uString_release(pVariant);
203 if (pCountry) rtl_uString_release(pCountry);
204 if (pLanguage) rtl_uString_release(pLanguage);
205
206 return ret;
207 }
208 else
209 return rtl_locale_register( c_locale, c_locale + 1, c_locale + 1 );
210 }
211
212 return NULL;
213 }
214
215 /*
216 * This implementation of osl_getTextEncodingFromLocale maps
217 * from the ISO language codes.
218 */
219
220 const _pair _full_locale_list[] = {
221 { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP },
222 { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP },
223 { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR },
224 { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN },
225 { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW }
226 };
227
228 const _pair _locale_extension_list[] = {
229 { "big5", RTL_TEXTENCODING_BIG5 },
230 { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS },
231 { "gb18030", RTL_TEXTENCODING_GB_18030 },
232 { "euc", RTL_TEXTENCODING_EUC_JP },
233 { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1 },
234 { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10 },
235 { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13 },
236 { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14 },
237 { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15 },
238 { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2 },
239 { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3 },
240 { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4 },
241 { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5 },
242 { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6 },
243 { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7 },
244 { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8 },
245 { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9 },
246 { "koi8-r", RTL_TEXTENCODING_KOI8_R },
247 { "koi8-u", RTL_TEXTENCODING_KOI8_U },
248 { "pck", RTL_TEXTENCODING_MS_932 },
249 #if (0)
250 { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW },
251 #endif
252 { "utf-16", RTL_TEXTENCODING_UNICODE },
253 { "utf-7", RTL_TEXTENCODING_UTF7 },
254 { "utf-8", RTL_TEXTENCODING_UTF8 }
255 };
256
257 const _pair _iso_language_list[] = {
258 { "af", RTL_TEXTENCODING_ISO_8859_1 },
259 { "ar", RTL_TEXTENCODING_ISO_8859_6 },
260 { "az", RTL_TEXTENCODING_ISO_8859_9 },
261 { "be", RTL_TEXTENCODING_ISO_8859_5 },
262 { "bg", RTL_TEXTENCODING_ISO_8859_5 },
263 { "ca", RTL_TEXTENCODING_ISO_8859_1 },
264 { "cs", RTL_TEXTENCODING_ISO_8859_2 },
265 { "da", RTL_TEXTENCODING_ISO_8859_1 },
266 { "de", RTL_TEXTENCODING_ISO_8859_1 },
267 { "el", RTL_TEXTENCODING_ISO_8859_7 },
268 { "en", RTL_TEXTENCODING_ISO_8859_1 },
269 { "es", RTL_TEXTENCODING_ISO_8859_1 },
270 { "et", RTL_TEXTENCODING_ISO_8859_4 },
271 { "eu", RTL_TEXTENCODING_ISO_8859_1 },
272 { "fa", RTL_TEXTENCODING_ISO_8859_6 },
273 { "fi", RTL_TEXTENCODING_ISO_8859_1 },
274 { "fo", RTL_TEXTENCODING_ISO_8859_1 },
275 { "fr", RTL_TEXTENCODING_ISO_8859_1 },
276 { "gr", RTL_TEXTENCODING_ISO_8859_7 },
277 { "he", RTL_TEXTENCODING_ISO_8859_8 },
278 { "hi", RTL_TEXTENCODING_DONTKNOW },
279 { "hr", RTL_TEXTENCODING_ISO_8859_2 },
280 { "hu", RTL_TEXTENCODING_ISO_8859_2 },
281 { "hy", RTL_TEXTENCODING_DONTKNOW },
282 { "id", RTL_TEXTENCODING_ISO_8859_1 },
283 { "is", RTL_TEXTENCODING_ISO_8859_1 },
284 { "it", RTL_TEXTENCODING_ISO_8859_1 },
285 { "iw", RTL_TEXTENCODING_ISO_8859_8 },
286 { "ja", RTL_TEXTENCODING_EUC_JP },
287 { "ka", RTL_TEXTENCODING_DONTKNOW },
288 { "kk", RTL_TEXTENCODING_ISO_8859_5 },
289 { "ko", RTL_TEXTENCODING_EUC_KR },
290 { "lt", RTL_TEXTENCODING_ISO_8859_4 },
291 { "lv", RTL_TEXTENCODING_ISO_8859_4 },
292 { "mk", RTL_TEXTENCODING_ISO_8859_5 },
293 { "mr", RTL_TEXTENCODING_DONTKNOW },
294 { "ms", RTL_TEXTENCODING_ISO_8859_1 },
295 { "nl", RTL_TEXTENCODING_ISO_8859_1 },
296 { "no", RTL_TEXTENCODING_ISO_8859_1 },
297 { "pl", RTL_TEXTENCODING_ISO_8859_2 },
298 { "pt", RTL_TEXTENCODING_ISO_8859_1 },
299 { "ro", RTL_TEXTENCODING_ISO_8859_2 },
300 { "ru", RTL_TEXTENCODING_ISO_8859_5 },
301 { "sa", RTL_TEXTENCODING_DONTKNOW },
302 { "sk", RTL_TEXTENCODING_ISO_8859_2 },
303 { "sl", RTL_TEXTENCODING_ISO_8859_2 },
304 { "sq", RTL_TEXTENCODING_ISO_8859_2 },
305 { "sv", RTL_TEXTENCODING_ISO_8859_1 },
306 { "sw", RTL_TEXTENCODING_ISO_8859_1 },
307 { "ta", RTL_TEXTENCODING_DONTKNOW },
308 { "th", RTL_TEXTENCODING_DONTKNOW },
309 { "tr", RTL_TEXTENCODING_ISO_8859_9 },
310 { "tt", RTL_TEXTENCODING_ISO_8859_5 },
311 { "uk", RTL_TEXTENCODING_ISO_8859_5 },
312 { "ur", RTL_TEXTENCODING_ISO_8859_6 },
313 { "uz", RTL_TEXTENCODING_ISO_8859_9 },
314 { "vi", RTL_TEXTENCODING_DONTKNOW },
315 { "zh", RTL_TEXTENCODING_BIG5 }
316 };
317
318 /*****************************************************************************
319 return the text encoding corresponding to the given locale
320 *****************************************************************************/
321
osl_getTextEncodingFromLocale(rtl_Locale * pLocale)322 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
323 {
324 const _pair *language = NULL;
325 char locale_buf[64] = "";
326 char *cp;
327
328 WinMessageBox(HWND_DESKTOP,HWND_DESKTOP,
329 "Please contact technical support and report above informations.\n\n",
330 "Critical error: osl_getTextEncodingFromLocale",
331 0, MB_ERROR | MB_OK | MB_MOVEABLE);
332
333 /* default to process locale if pLocale == NULL */
334 if( NULL == pLocale )
335 osl_getProcessLocale( &pLocale );
336
337 /* convert rtl_Locale to locale string */
338 if( _compose_locale( pLocale, locale_buf, 64 ) )
339 {
340 /* check special handling list (EUC) first */
341 const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair );
342 language = _pair_search( locale_buf, _full_locale_list, members);
343
344 if( NULL == language )
345 {
346 /*
347 * check if there is a charset qualifier at the end of the given locale string
348 * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
349 * charset to use
350 */
351 cp = strrchr( locale_buf, '.' );
352
353 if( NULL != cp )
354 {
355 const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair );
356 language = _pair_search( cp + 1, _locale_extension_list, members);
357 }
358 }
359
360 /* use iso language code to determine the charset */
361 if( NULL == language )
362 {
363 const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair );
364
365 /* iso lang codes have 2 charaters */
366 locale_buf[2] = '\0';
367
368 language = _pair_search( locale_buf, _iso_language_list, members);
369 }
370 }
371
372 /* a matching item in our list provides a mapping from codeset to
373 * rtl-codeset */
374 if ( language != NULL )
375 return language->value;
376
377 return RTL_TEXTENCODING_DONTKNOW;
378 }
379
380 /*****************************************************************************
381 return the current process locale
382 *****************************************************************************/
383
_imp_getProcessLocale(rtl_Locale ** ppLocale)384 void _imp_getProcessLocale( rtl_Locale ** ppLocale )
385 {
386 /* simulate behavior off setlocale */
387 char * locale = getenv( "LC_ALL" );
388
389 if( NULL == locale )
390 locale = getenv( "LC_CTYPE" );
391
392 if( NULL == locale )
393 locale = getenv( "LANG" );
394
395 if( NULL == locale )
396 locale = "C";
397
398 *ppLocale = _parse_locale( locale );
399 }
400
401 /*****************************************************************************
402 set the current process locale
403 *****************************************************************************/
404
_imp_setProcessLocale(rtl_Locale * pLocale)405 int _imp_setProcessLocale( rtl_Locale * pLocale )
406 {
407 char locale_buf[64];
408
409 /* convert rtl_Locale to locale string */
410 if( NULL != _compose_locale( pLocale, locale_buf, 64 ) )
411 {
412 /* only change env vars that exist already */
413 if( getenv( "LC_ALL" ) ) {
414 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
415 setenv( "LC_ALL", locale_buf, 1);
416 #else
417 setenv( "LC_ALL", locale_buf );
418 #endif
419 }
420
421 if( getenv( "LC_CTYPE" ) ) {
422 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
423 setenv("LC_CTYPE", locale_buf, 1 );
424 #else
425 setenv( "LC_CTYPE", locale_buf );
426 #endif
427 }
428
429 if( getenv( "LANG" ) ) {
430 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ )
431 setenv("LC_CTYPE", locale_buf, 1 );
432 #else
433 setenv( "LANG", locale_buf );
434 #endif
435 }
436 }
437
438 return 0;
439 }
440
441
442