1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #define INCL_WIN 25 #include "svpm.h" 26 27 #include <osl/nlsupport.h> 28 #include <osl/diagnose.h> 29 #include <osl/process.h> 30 #include <rtl/memory.h> 31 32 #include <string.h> 33 34 /***************************************************************************** 35 typedefs 36 *****************************************************************************/ 37 38 39 typedef struct { 40 const char *key; 41 const rtl_TextEncoding value; 42 } _pair; 43 44 45 /***************************************************************************** 46 compare function for binary search 47 *****************************************************************************/ 48 49 static int 50 _pair_compare (const char *key, const _pair *pair) 51 { 52 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key ); 53 return result; 54 } 55 56 /***************************************************************************** 57 binary search on encoding tables 58 *****************************************************************************/ 59 60 static const _pair* 61 _pair_search (const char *key, const _pair *base, unsigned int member ) 62 { 63 unsigned int lower = 0; 64 unsigned int upper = member; 65 unsigned int current; 66 int comparison; 67 68 /* check for validity of input */ 69 if ( (key == NULL) || (base == NULL) || (member == 0) ) 70 return NULL; 71 72 /* binary search */ 73 while ( lower < upper ) 74 { 75 current = (lower + upper) / 2; 76 comparison = _pair_compare( key, base + current ); 77 if (comparison < 0) 78 upper = current; 79 else if (comparison > 0) 80 lower = current + 1; 81 else 82 return base + current; 83 } 84 85 return NULL; 86 } 87 88 89 /***************************************************************************** 90 convert rtl_Locale to locale string 91 *****************************************************************************/ 92 93 static char * _compose_locale( rtl_Locale * pLocale, char * buffer, size_t n ) 94 { 95 /* check if a valid locale is specified */ 96 if( pLocale && pLocale->Language && (pLocale->Language->length == 2) ) 97 { 98 size_t offset = 0; 99 100 /* convert language code to ascii */ 101 { 102 rtl_String *pLanguage = NULL; 103 104 rtl_uString2String( &pLanguage, 105 pLocale->Language->buffer, pLocale->Language->length, 106 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); 107 108 if( pLanguage->length < n ) 109 { 110 strcpy( buffer, pLanguage->buffer ); 111 offset = pLanguage->length; 112 } 113 114 rtl_string_release( pLanguage ); 115 } 116 117 /* convert country code to ascii */ 118 if( pLocale->Country && (pLocale->Country->length == 2) ) 119 { 120 rtl_String *pCountry = NULL; 121 122 rtl_uString2String( &pCountry, 123 pLocale->Country->buffer, pLocale->Country->length, 124 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); 125 126 if( offset + pCountry->length + 1 < n ) 127 { 128 strcpy( buffer + offset++, "_" ); 129 strcpy( buffer + offset, pCountry->buffer ); 130 offset += pCountry->length; 131 } 132 133 rtl_string_release( pCountry ); 134 } 135 136 /* convert variant to ascii - check if there is enough space for the variant string */ 137 if( pLocale->Variant && pLocale->Variant->length && 138 ( pLocale->Variant->length < n - 6 ) ) 139 { 140 rtl_String *pVariant = NULL; 141 142 rtl_uString2String( &pVariant, 143 pLocale->Variant->buffer, pLocale->Variant->length, 144 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); 145 146 if( offset + pVariant->length + 1 < n ) 147 { 148 strcpy( buffer + offset, pVariant->buffer ); 149 offset += pVariant->length; 150 } 151 152 rtl_string_release( pVariant ); 153 } 154 155 return buffer; 156 } 157 158 return NULL; 159 } 160 161 /***************************************************************************** 162 convert locale string to rtl_Locale 163 *****************************************************************************/ 164 165 static rtl_Locale * _parse_locale( const char * locale ) 166 { 167 static sal_Unicode c_locale[2] = { (sal_Unicode) 'C', 0 }; 168 169 /* check if locale contains a valid string */ 170 if( locale ) 171 { 172 size_t len = strlen( locale ); 173 174 if( len >= 2 ) 175 { 176 rtl_uString * pLanguage = NULL; 177 rtl_uString * pCountry = NULL; 178 rtl_uString * pVariant = NULL; 179 180 size_t offset = 2; 181 182 /* convert language code to unicode */ 183 rtl_string2UString( &pLanguage, locale, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); 184 OSL_ASSERT(pLanguage != NULL); 185 186 /* convert country code to unicode */ 187 if( len >= 5 && '_' == locale[2] ) 188 { 189 rtl_string2UString( &pCountry, locale + 3, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); 190 OSL_ASSERT(pCountry != NULL); 191 offset = 5; 192 } 193 194 /* convert variant code to unicode - do not rely on "." as delimiter */ 195 if( len > offset ) { 196 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); 197 OSL_ASSERT(pVariant != NULL); 198 } 199 200 rtl_Locale * ret = rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : c_locale + 1, pVariant ? pVariant->buffer : c_locale + 1 ); 201 202 if (pVariant) rtl_uString_release(pVariant); 203 if (pCountry) rtl_uString_release(pCountry); 204 if (pLanguage) rtl_uString_release(pLanguage); 205 206 return ret; 207 } 208 else 209 return rtl_locale_register( c_locale, c_locale + 1, c_locale + 1 ); 210 } 211 212 return NULL; 213 } 214 215 /* 216 * This implementation of osl_getTextEncodingFromLocale maps 217 * from the ISO language codes. 218 */ 219 220 const _pair _full_locale_list[] = { 221 { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP }, 222 { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP }, 223 { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR }, 224 { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN }, 225 { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW } 226 }; 227 228 const _pair _locale_extension_list[] = { 229 { "big5", RTL_TEXTENCODING_BIG5 }, 230 { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS }, 231 { "gb18030", RTL_TEXTENCODING_GB_18030 }, 232 { "euc", RTL_TEXTENCODING_EUC_JP }, 233 { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1 }, 234 { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10 }, 235 { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13 }, 236 { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14 }, 237 { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15 }, 238 { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2 }, 239 { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3 }, 240 { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4 }, 241 { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5 }, 242 { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6 }, 243 { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7 }, 244 { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8 }, 245 { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9 }, 246 { "koi8-r", RTL_TEXTENCODING_KOI8_R }, 247 { "koi8-u", RTL_TEXTENCODING_KOI8_U }, 248 { "pck", RTL_TEXTENCODING_MS_932 }, 249 #if (0) 250 { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW }, 251 #endif 252 { "utf-16", RTL_TEXTENCODING_UNICODE }, 253 { "utf-7", RTL_TEXTENCODING_UTF7 }, 254 { "utf-8", RTL_TEXTENCODING_UTF8 } 255 }; 256 257 const _pair _iso_language_list[] = { 258 { "af", RTL_TEXTENCODING_ISO_8859_1 }, 259 { "ar", RTL_TEXTENCODING_ISO_8859_6 }, 260 { "az", RTL_TEXTENCODING_ISO_8859_9 }, 261 { "be", RTL_TEXTENCODING_ISO_8859_5 }, 262 { "bg", RTL_TEXTENCODING_ISO_8859_5 }, 263 { "ca", RTL_TEXTENCODING_ISO_8859_1 }, 264 { "cs", RTL_TEXTENCODING_ISO_8859_2 }, 265 { "da", RTL_TEXTENCODING_ISO_8859_1 }, 266 { "de", RTL_TEXTENCODING_ISO_8859_1 }, 267 { "el", RTL_TEXTENCODING_ISO_8859_7 }, 268 { "en", RTL_TEXTENCODING_ISO_8859_1 }, 269 { "es", RTL_TEXTENCODING_ISO_8859_1 }, 270 { "et", RTL_TEXTENCODING_ISO_8859_4 }, 271 { "eu", RTL_TEXTENCODING_ISO_8859_1 }, 272 { "fa", RTL_TEXTENCODING_ISO_8859_6 }, 273 { "fi", RTL_TEXTENCODING_ISO_8859_1 }, 274 { "fo", RTL_TEXTENCODING_ISO_8859_1 }, 275 { "fr", RTL_TEXTENCODING_ISO_8859_1 }, 276 { "gr", RTL_TEXTENCODING_ISO_8859_7 }, 277 { "he", RTL_TEXTENCODING_ISO_8859_8 }, 278 { "hi", RTL_TEXTENCODING_DONTKNOW }, 279 { "hr", RTL_TEXTENCODING_ISO_8859_2 }, 280 { "hu", RTL_TEXTENCODING_ISO_8859_2 }, 281 { "hy", RTL_TEXTENCODING_DONTKNOW }, 282 { "id", RTL_TEXTENCODING_ISO_8859_1 }, 283 { "is", RTL_TEXTENCODING_ISO_8859_1 }, 284 { "it", RTL_TEXTENCODING_ISO_8859_1 }, 285 { "iw", RTL_TEXTENCODING_ISO_8859_8 }, 286 { "ja", RTL_TEXTENCODING_EUC_JP }, 287 { "ka", RTL_TEXTENCODING_DONTKNOW }, 288 { "kk", RTL_TEXTENCODING_ISO_8859_5 }, 289 { "ko", RTL_TEXTENCODING_EUC_KR }, 290 { "lt", RTL_TEXTENCODING_ISO_8859_4 }, 291 { "lv", RTL_TEXTENCODING_ISO_8859_4 }, 292 { "mk", RTL_TEXTENCODING_ISO_8859_5 }, 293 { "mr", RTL_TEXTENCODING_DONTKNOW }, 294 { "ms", RTL_TEXTENCODING_ISO_8859_1 }, 295 { "nl", RTL_TEXTENCODING_ISO_8859_1 }, 296 { "no", RTL_TEXTENCODING_ISO_8859_1 }, 297 { "pl", RTL_TEXTENCODING_ISO_8859_2 }, 298 { "pt", RTL_TEXTENCODING_ISO_8859_1 }, 299 { "ro", RTL_TEXTENCODING_ISO_8859_2 }, 300 { "ru", RTL_TEXTENCODING_ISO_8859_5 }, 301 { "sa", RTL_TEXTENCODING_DONTKNOW }, 302 { "sk", RTL_TEXTENCODING_ISO_8859_2 }, 303 { "sl", RTL_TEXTENCODING_ISO_8859_2 }, 304 { "sq", RTL_TEXTENCODING_ISO_8859_2 }, 305 { "sv", RTL_TEXTENCODING_ISO_8859_1 }, 306 { "sw", RTL_TEXTENCODING_ISO_8859_1 }, 307 { "ta", RTL_TEXTENCODING_DONTKNOW }, 308 { "th", RTL_TEXTENCODING_DONTKNOW }, 309 { "tr", RTL_TEXTENCODING_ISO_8859_9 }, 310 { "tt", RTL_TEXTENCODING_ISO_8859_5 }, 311 { "uk", RTL_TEXTENCODING_ISO_8859_5 }, 312 { "ur", RTL_TEXTENCODING_ISO_8859_6 }, 313 { "uz", RTL_TEXTENCODING_ISO_8859_9 }, 314 { "vi", RTL_TEXTENCODING_DONTKNOW }, 315 { "zh", RTL_TEXTENCODING_BIG5 } 316 }; 317 318 /***************************************************************************** 319 return the text encoding corresponding to the given locale 320 *****************************************************************************/ 321 322 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale ) 323 { 324 const _pair *language = 0; 325 char locale_buf[64] = ""; 326 char *cp; 327 328 WinMessageBox(HWND_DESKTOP,HWND_DESKTOP, 329 "Please contact technical support and report above informations.\n\n", 330 "Critical error: osl_getTextEncodingFromLocale", 331 0, MB_ERROR | MB_OK | MB_MOVEABLE); 332 333 /* default to process locale if pLocale == NULL */ 334 if( NULL == pLocale ) 335 osl_getProcessLocale( &pLocale ); 336 337 /* convert rtl_Locale to locale string */ 338 if( _compose_locale( pLocale, locale_buf, 64 ) ) 339 { 340 /* check special handling list (EUC) first */ 341 const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair ); 342 language = _pair_search( locale_buf, _full_locale_list, members); 343 344 if( NULL == language ) 345 { 346 /* 347 * check if there is a charset qualifier at the end of the given locale string 348 * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what 349 * charset to use 350 */ 351 cp = strrchr( locale_buf, '.' ); 352 353 if( NULL != cp ) 354 { 355 const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair ); 356 language = _pair_search( cp + 1, _locale_extension_list, members); 357 } 358 } 359 360 /* use iso language code to determine the charset */ 361 if( NULL == language ) 362 { 363 const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair ); 364 365 /* iso lang codes have 2 charaters */ 366 locale_buf[2] = '\0'; 367 368 language = _pair_search( locale_buf, _iso_language_list, members); 369 } 370 } 371 372 /* a matching item in our list provides a mapping from codeset to 373 * rtl-codeset */ 374 if ( language != NULL ) 375 return language->value; 376 377 return RTL_TEXTENCODING_DONTKNOW; 378 } 379 380 /***************************************************************************** 381 return the current process locale 382 *****************************************************************************/ 383 384 void _imp_getProcessLocale( rtl_Locale ** ppLocale ) 385 { 386 /* simulate behavior off setlocale */ 387 char * locale = getenv( "LC_ALL" ); 388 389 if( NULL == locale ) 390 locale = getenv( "LC_CTYPE" ); 391 392 if( NULL == locale ) 393 locale = getenv( "LANG" ); 394 395 if( NULL == locale ) 396 locale = "C"; 397 398 *ppLocale = _parse_locale( locale ); 399 } 400 401 /***************************************************************************** 402 set the current process locale 403 *****************************************************************************/ 404 405 int _imp_setProcessLocale( rtl_Locale * pLocale ) 406 { 407 char locale_buf[64]; 408 409 /* convert rtl_Locale to locale string */ 410 if( NULL != _compose_locale( pLocale, locale_buf, 64 ) ) 411 { 412 /* only change env vars that exist already */ 413 if( getenv( "LC_ALL" ) ) { 414 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ ) 415 setenv( "LC_ALL", locale_buf, 1); 416 #else 417 setenv( "LC_ALL", locale_buf ); 418 #endif 419 } 420 421 if( getenv( "LC_CTYPE" ) ) { 422 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ ) 423 setenv("LC_CTYPE", locale_buf, 1 ); 424 #else 425 setenv( "LC_CTYPE", locale_buf ); 426 #endif 427 } 428 429 if( getenv( "LANG" ) ) { 430 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ ) 431 setenv("LC_CTYPE", locale_buf, 1 ); 432 #else 433 setenv( "LANG", locale_buf ); 434 #endif 435 } 436 } 437 438 return 0; 439 } 440 441 442