1*647f063dSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*647f063dSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*647f063dSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*647f063dSAndrew Rist * distributed with this work for additional information 6*647f063dSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*647f063dSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*647f063dSAndrew Rist * "License"); you may not use this file except in compliance 9*647f063dSAndrew Rist * with the License. You may obtain a copy of the License at 10cdf0e10cSrcweir * 11*647f063dSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12cdf0e10cSrcweir * 13*647f063dSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*647f063dSAndrew Rist * software distributed under the License is distributed on an 15*647f063dSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*647f063dSAndrew Rist * KIND, either express or implied. See the License for the 17*647f063dSAndrew Rist * specific language governing permissions and limitations 18*647f063dSAndrew Rist * under the License. 19cdf0e10cSrcweir * 20*647f063dSAndrew Rist *************************************************************/ 21*647f063dSAndrew Rist 22*647f063dSAndrew Rist 23cdf0e10cSrcweir #if defined(_MSC_VER) && (_MSC_VER >= 1400) 24cdf0e10cSrcweir #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance 25cdf0e10cSrcweir #endif 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <rtl/memory.h> 28cdf0e10cSrcweir #include <osl/interlck.h> 29cdf0e10cSrcweir #include <rtl/alloc.h> 30cdf0e10cSrcweir #include <osl/diagnose.h> 31cdf0e10cSrcweir #include <rtl/tencinfo.h> 32cdf0e10cSrcweir 33cdf0e10cSrcweir #include "strimp.h" 34cdf0e10cSrcweir #include "surrogates.h" 35cdf0e10cSrcweir #include <rtl/string.h> 36cdf0e10cSrcweir 37cdf0e10cSrcweir #include "rtl/math.h" 38cdf0e10cSrcweir #include "rtl/tencinfo.h" 39cdf0e10cSrcweir 40cdf0e10cSrcweir /* ======================================================================= */ 41cdf0e10cSrcweir 42cdf0e10cSrcweir /* static data to be referenced by all empty strings 43cdf0e10cSrcweir * the refCount is predefined to 1 and must never become 0 ! 44cdf0e10cSrcweir */ 45cdf0e10cSrcweir static rtl_String const aImplEmpty_rtl_String = 46cdf0e10cSrcweir { 47cdf0e10cSrcweir SAL_STRING_STATIC_FLAG|1, 48cdf0e10cSrcweir /* sal_Int32 refCount; */ 49cdf0e10cSrcweir 0, /* sal_Int32 length; */ 50cdf0e10cSrcweir { 0 } /* sal_Char buffer[1]; */ 51cdf0e10cSrcweir }; 52cdf0e10cSrcweir 53cdf0e10cSrcweir /* ======================================================================= */ 54cdf0e10cSrcweir 55cdf0e10cSrcweir #define IMPL_RTL_STRCODE sal_Char 56cdf0e10cSrcweir #define IMPL_RTL_USTRCODE( c ) ((unsigned char)c) 57cdf0e10cSrcweir #define IMPL_RTL_STRNAME( n ) rtl_str_ ## n 58cdf0e10cSrcweir 59cdf0e10cSrcweir #define IMPL_RTL_STRINGNAME( n ) rtl_string_ ## n 60cdf0e10cSrcweir #define IMPL_RTL_STRINGDATA rtl_String 61cdf0e10cSrcweir #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_String 62cdf0e10cSrcweir 63cdf0e10cSrcweir /* ======================================================================= */ 64cdf0e10cSrcweir 65cdf0e10cSrcweir /* Include String/UString template code */ 66cdf0e10cSrcweir 67cdf0e10cSrcweir #include "strtmpl.c" 68cdf0e10cSrcweir 69cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f) 70cdf0e10cSrcweir { 71cdf0e10cSrcweir rtl_String * pResult = NULL; 72cdf0e10cSrcweir sal_Int32 nLen; 73cdf0e10cSrcweir rtl_math_doubleToString( 74cdf0e10cSrcweir &pResult, 0, 0, f, rtl_math_StringFormat_G, 75cdf0e10cSrcweir RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0, 76cdf0e10cSrcweir sal_True); 77cdf0e10cSrcweir nLen = pResult->length; 78cdf0e10cSrcweir OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT); 79cdf0e10cSrcweir rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char)); 80cdf0e10cSrcweir rtl_string_release(pResult); 81cdf0e10cSrcweir return nLen; 82cdf0e10cSrcweir } 83cdf0e10cSrcweir 84cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d) 85cdf0e10cSrcweir { 86cdf0e10cSrcweir rtl_String * pResult = NULL; 87cdf0e10cSrcweir sal_Int32 nLen; 88cdf0e10cSrcweir rtl_math_doubleToString( 89cdf0e10cSrcweir &pResult, 0, 0, d, rtl_math_StringFormat_G, 90cdf0e10cSrcweir RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 91cdf0e10cSrcweir 0, sal_True); 92cdf0e10cSrcweir nLen = pResult->length; 93cdf0e10cSrcweir OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE); 94cdf0e10cSrcweir rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char)); 95cdf0e10cSrcweir rtl_string_release(pResult); 96cdf0e10cSrcweir return nLen; 97cdf0e10cSrcweir } 98cdf0e10cSrcweir 99cdf0e10cSrcweir float SAL_CALL rtl_str_toFloat(sal_Char const * pStr) 100cdf0e10cSrcweir { 101cdf0e10cSrcweir return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), 102cdf0e10cSrcweir '.', 0, 0, 0); 103cdf0e10cSrcweir } 104cdf0e10cSrcweir 105cdf0e10cSrcweir double SAL_CALL rtl_str_toDouble(sal_Char const * pStr) 106cdf0e10cSrcweir { 107cdf0e10cSrcweir return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0, 108cdf0e10cSrcweir 0, 0); 109cdf0e10cSrcweir } 110cdf0e10cSrcweir 111cdf0e10cSrcweir /* ======================================================================= */ 112cdf0e10cSrcweir 113cdf0e10cSrcweir static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen ) 114cdf0e10cSrcweir { 115cdf0e10cSrcweir int n; 116cdf0e10cSrcweir sal_Unicode c; 117cdf0e10cSrcweir sal_uInt32 nUCS4Char; 118cdf0e10cSrcweir const sal_Unicode* pEndStr; 119cdf0e10cSrcweir 120cdf0e10cSrcweir n = 0; 121cdf0e10cSrcweir pEndStr = pStr+nLen; 122cdf0e10cSrcweir while ( pStr < pEndStr ) 123cdf0e10cSrcweir { 124cdf0e10cSrcweir c = *pStr; 125cdf0e10cSrcweir 126cdf0e10cSrcweir if ( c < 0x80 ) 127cdf0e10cSrcweir n++; 128cdf0e10cSrcweir else if ( c < 0x800 ) 129cdf0e10cSrcweir n += 2; 130cdf0e10cSrcweir else 131cdf0e10cSrcweir { 132cdf0e10cSrcweir if ( !SAL_RTL_IS_HIGH_SURROGATE(c) ) 133cdf0e10cSrcweir n += 3; 134cdf0e10cSrcweir else 135cdf0e10cSrcweir { 136cdf0e10cSrcweir nUCS4Char = c; 137cdf0e10cSrcweir 138cdf0e10cSrcweir if ( pStr+1 < pEndStr ) 139cdf0e10cSrcweir { 140cdf0e10cSrcweir c = *(pStr+1); 141cdf0e10cSrcweir if ( SAL_RTL_IS_LOW_SURROGATE(c) ) 142cdf0e10cSrcweir { 143cdf0e10cSrcweir nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c); 144cdf0e10cSrcweir pStr++; 145cdf0e10cSrcweir } 146cdf0e10cSrcweir } 147cdf0e10cSrcweir 148cdf0e10cSrcweir if ( nUCS4Char < 0x10000 ) 149cdf0e10cSrcweir n += 3; 150cdf0e10cSrcweir else if ( nUCS4Char < 0x200000 ) 151cdf0e10cSrcweir n += 4; 152cdf0e10cSrcweir else if ( nUCS4Char < 0x4000000 ) 153cdf0e10cSrcweir n += 5; 154cdf0e10cSrcweir else 155cdf0e10cSrcweir n += 6; 156cdf0e10cSrcweir } 157cdf0e10cSrcweir } 158cdf0e10cSrcweir 159cdf0e10cSrcweir pStr++; 160cdf0e10cSrcweir } 161cdf0e10cSrcweir 162cdf0e10cSrcweir return n; 163cdf0e10cSrcweir } 164cdf0e10cSrcweir 165cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 166cdf0e10cSrcweir 167cdf0e10cSrcweir sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget, 168cdf0e10cSrcweir sal_Unicode const * pSource, 169cdf0e10cSrcweir sal_Int32 nLength, 170cdf0e10cSrcweir rtl_TextEncoding nEncoding, 171cdf0e10cSrcweir sal_uInt32 nFlags, 172cdf0e10cSrcweir sal_Bool bCheckErrors) 173cdf0e10cSrcweir { 174cdf0e10cSrcweir OSL_ASSERT(pTarget != NULL 175cdf0e10cSrcweir && (pSource != NULL || nLength == 0) 176cdf0e10cSrcweir && nLength >= 0 177cdf0e10cSrcweir && rtl_isOctetTextEncoding(nEncoding)); 178cdf0e10cSrcweir 179cdf0e10cSrcweir if ( !nLength ) 180cdf0e10cSrcweir rtl_string_new( pTarget ); 181cdf0e10cSrcweir else 182cdf0e10cSrcweir { 183cdf0e10cSrcweir rtl_String* pTemp; 184cdf0e10cSrcweir rtl_UnicodeToTextConverter hConverter; 185cdf0e10cSrcweir sal_uInt32 nInfo; 186cdf0e10cSrcweir sal_Size nSrcChars; 187cdf0e10cSrcweir sal_Size nDestBytes; 188cdf0e10cSrcweir sal_Size nNewLen; 189cdf0e10cSrcweir sal_Size nNotConvertedChars; 190cdf0e10cSrcweir sal_Size nMaxCharLen; 191cdf0e10cSrcweir 192cdf0e10cSrcweir /* Optimization for UTF-8 - we try to calculate the exact length */ 193cdf0e10cSrcweir /* For all other encoding we try an good estimation */ 194cdf0e10cSrcweir if ( nEncoding == RTL_TEXTENCODING_UTF8 ) 195cdf0e10cSrcweir { 196cdf0e10cSrcweir nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength ); 197cdf0e10cSrcweir /* Includes the string only ASCII, then we could copy 198cdf0e10cSrcweir the buffer faster */ 199cdf0e10cSrcweir if ( nNewLen == (sal_Size)nLength ) 200cdf0e10cSrcweir { 201cdf0e10cSrcweir IMPL_RTL_STRCODE* pBuffer; 202cdf0e10cSrcweir if ( *pTarget ) 203cdf0e10cSrcweir IMPL_RTL_STRINGNAME( release )( *pTarget ); 204cdf0e10cSrcweir *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength ); 205cdf0e10cSrcweir OSL_ASSERT(*pTarget != NULL); 206cdf0e10cSrcweir pBuffer = (*pTarget)->buffer; 207cdf0e10cSrcweir do 208cdf0e10cSrcweir { 209cdf0e10cSrcweir /* Check ASCII range */ 210cdf0e10cSrcweir OSL_ENSURE( *pSource <= 127, 211cdf0e10cSrcweir "rtl_uString2String() - UTF8 test is encoding is wrong" ); 212cdf0e10cSrcweir 213cdf0e10cSrcweir *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource; 214cdf0e10cSrcweir pBuffer++; 215cdf0e10cSrcweir pSource++; 216cdf0e10cSrcweir nLength--; 217cdf0e10cSrcweir } 218cdf0e10cSrcweir while ( nLength ); 219cdf0e10cSrcweir return sal_True; 220cdf0e10cSrcweir } 221cdf0e10cSrcweir 222cdf0e10cSrcweir nMaxCharLen = 4; 223cdf0e10cSrcweir } 224cdf0e10cSrcweir else 225cdf0e10cSrcweir { 226cdf0e10cSrcweir rtl_TextEncodingInfo aTextEncInfo; 227cdf0e10cSrcweir aTextEncInfo.StructSize = sizeof( aTextEncInfo ); 228cdf0e10cSrcweir if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) ) 229cdf0e10cSrcweir { 230cdf0e10cSrcweir aTextEncInfo.AverageCharSize = 1; 231cdf0e10cSrcweir aTextEncInfo.MaximumCharSize = 8; 232cdf0e10cSrcweir } 233cdf0e10cSrcweir 234cdf0e10cSrcweir nNewLen = nLength*aTextEncInfo.AverageCharSize; 235cdf0e10cSrcweir nMaxCharLen = aTextEncInfo.MaximumCharSize; 236cdf0e10cSrcweir } 237cdf0e10cSrcweir 238cdf0e10cSrcweir nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH; 239cdf0e10cSrcweir hConverter = rtl_createUnicodeToTextConverter( nEncoding ); 240cdf0e10cSrcweir 241cdf0e10cSrcweir for (;;) 242cdf0e10cSrcweir { 243cdf0e10cSrcweir pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 244cdf0e10cSrcweir OSL_ASSERT(pTemp != NULL); 245cdf0e10cSrcweir nDestBytes = rtl_convertUnicodeToText( hConverter, 0, 246cdf0e10cSrcweir pSource, nLength, 247cdf0e10cSrcweir pTemp->buffer, nNewLen, 248cdf0e10cSrcweir nFlags, 249cdf0e10cSrcweir &nInfo, &nSrcChars ); 250cdf0e10cSrcweir if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0) 251cdf0e10cSrcweir { 252cdf0e10cSrcweir rtl_freeMemory(pTemp); 253cdf0e10cSrcweir rtl_destroyUnicodeToTextConverter(hConverter); 254cdf0e10cSrcweir return sal_False; 255cdf0e10cSrcweir } 256cdf0e10cSrcweir 257cdf0e10cSrcweir if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0) 258cdf0e10cSrcweir break; 259cdf0e10cSrcweir 260cdf0e10cSrcweir /* Buffer not big enough, try again with enough space */ 261cdf0e10cSrcweir rtl_freeMemory( pTemp ); 262cdf0e10cSrcweir 263cdf0e10cSrcweir /* Try with the max. count of characters with 264cdf0e10cSrcweir additional overhead for replacing functionality */ 265cdf0e10cSrcweir nNotConvertedChars = nLength-nSrcChars; 266cdf0e10cSrcweir nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4; 267cdf0e10cSrcweir } 268cdf0e10cSrcweir 269cdf0e10cSrcweir /* Set the buffer to the correct size or is there to 270cdf0e10cSrcweir much overhead, reallocate to the correct size */ 271cdf0e10cSrcweir if ( nNewLen > nDestBytes+8 ) 272cdf0e10cSrcweir { 273cdf0e10cSrcweir rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes ); 274cdf0e10cSrcweir OSL_ASSERT(pTemp2 != NULL); 275cdf0e10cSrcweir rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes ); 276cdf0e10cSrcweir rtl_freeMemory( pTemp ); 277cdf0e10cSrcweir pTemp = pTemp2; 278cdf0e10cSrcweir } 279cdf0e10cSrcweir else 280cdf0e10cSrcweir { 281cdf0e10cSrcweir pTemp->length = nDestBytes; 282cdf0e10cSrcweir pTemp->buffer[nDestBytes] = 0; 283cdf0e10cSrcweir } 284cdf0e10cSrcweir 285cdf0e10cSrcweir rtl_destroyUnicodeToTextConverter( hConverter ); 286cdf0e10cSrcweir if ( *pTarget ) 287cdf0e10cSrcweir IMPL_RTL_STRINGNAME( release )( *pTarget ); 288cdf0e10cSrcweir *pTarget = pTemp; 289cdf0e10cSrcweir 290cdf0e10cSrcweir /* Results the conversion in an empty buffer - 291cdf0e10cSrcweir create an empty string */ 292cdf0e10cSrcweir if ( pTemp && !nDestBytes ) 293cdf0e10cSrcweir rtl_string_new( pTarget ); 294cdf0e10cSrcweir } 295cdf0e10cSrcweir return sal_True; 296cdf0e10cSrcweir } 297cdf0e10cSrcweir 298cdf0e10cSrcweir void SAL_CALL rtl_uString2String( rtl_String** ppThis, 299cdf0e10cSrcweir const sal_Unicode* pUStr, 300cdf0e10cSrcweir sal_Int32 nULen, 301cdf0e10cSrcweir rtl_TextEncoding eTextEncoding, 302cdf0e10cSrcweir sal_uInt32 nCvtFlags ) 303cdf0e10cSrcweir { 304cdf0e10cSrcweir rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding, 305cdf0e10cSrcweir nCvtFlags, sal_False); 306cdf0e10cSrcweir } 307cdf0e10cSrcweir 308cdf0e10cSrcweir sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget, 309cdf0e10cSrcweir sal_Unicode const * pSource, 310cdf0e10cSrcweir sal_Int32 nLength, 311cdf0e10cSrcweir rtl_TextEncoding nEncoding, 312cdf0e10cSrcweir sal_uInt32 nFlags) 313cdf0e10cSrcweir { 314cdf0e10cSrcweir return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding, 315cdf0e10cSrcweir nFlags, sal_True); 316cdf0e10cSrcweir } 317