1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir #if defined(_MSC_VER) && (_MSC_VER >= 1400) 28*cdf0e10cSrcweir #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance 29*cdf0e10cSrcweir #endif 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <rtl/memory.h> 32*cdf0e10cSrcweir #include <osl/interlck.h> 33*cdf0e10cSrcweir #include <rtl/alloc.h> 34*cdf0e10cSrcweir #include <osl/diagnose.h> 35*cdf0e10cSrcweir #include <rtl/tencinfo.h> 36*cdf0e10cSrcweir 37*cdf0e10cSrcweir #include "strimp.h" 38*cdf0e10cSrcweir #include "surrogates.h" 39*cdf0e10cSrcweir #include <rtl/string.h> 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir #include "rtl/math.h" 42*cdf0e10cSrcweir #include "rtl/tencinfo.h" 43*cdf0e10cSrcweir 44*cdf0e10cSrcweir /* ======================================================================= */ 45*cdf0e10cSrcweir 46*cdf0e10cSrcweir /* static data to be referenced by all empty strings 47*cdf0e10cSrcweir * the refCount is predefined to 1 and must never become 0 ! 48*cdf0e10cSrcweir */ 49*cdf0e10cSrcweir static rtl_String const aImplEmpty_rtl_String = 50*cdf0e10cSrcweir { 51*cdf0e10cSrcweir SAL_STRING_STATIC_FLAG|1, 52*cdf0e10cSrcweir /* sal_Int32 refCount; */ 53*cdf0e10cSrcweir 0, /* sal_Int32 length; */ 54*cdf0e10cSrcweir { 0 } /* sal_Char buffer[1]; */ 55*cdf0e10cSrcweir }; 56*cdf0e10cSrcweir 57*cdf0e10cSrcweir /* ======================================================================= */ 58*cdf0e10cSrcweir 59*cdf0e10cSrcweir #define IMPL_RTL_STRCODE sal_Char 60*cdf0e10cSrcweir #define IMPL_RTL_USTRCODE( c ) ((unsigned char)c) 61*cdf0e10cSrcweir #define IMPL_RTL_STRNAME( n ) rtl_str_ ## n 62*cdf0e10cSrcweir 63*cdf0e10cSrcweir #define IMPL_RTL_STRINGNAME( n ) rtl_string_ ## n 64*cdf0e10cSrcweir #define IMPL_RTL_STRINGDATA rtl_String 65*cdf0e10cSrcweir #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_String 66*cdf0e10cSrcweir 67*cdf0e10cSrcweir /* ======================================================================= */ 68*cdf0e10cSrcweir 69*cdf0e10cSrcweir /* Include String/UString template code */ 70*cdf0e10cSrcweir 71*cdf0e10cSrcweir #include "strtmpl.c" 72*cdf0e10cSrcweir 73*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f) 74*cdf0e10cSrcweir { 75*cdf0e10cSrcweir rtl_String * pResult = NULL; 76*cdf0e10cSrcweir sal_Int32 nLen; 77*cdf0e10cSrcweir rtl_math_doubleToString( 78*cdf0e10cSrcweir &pResult, 0, 0, f, rtl_math_StringFormat_G, 79*cdf0e10cSrcweir RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0, 80*cdf0e10cSrcweir sal_True); 81*cdf0e10cSrcweir nLen = pResult->length; 82*cdf0e10cSrcweir OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT); 83*cdf0e10cSrcweir rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char)); 84*cdf0e10cSrcweir rtl_string_release(pResult); 85*cdf0e10cSrcweir return nLen; 86*cdf0e10cSrcweir } 87*cdf0e10cSrcweir 88*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d) 89*cdf0e10cSrcweir { 90*cdf0e10cSrcweir rtl_String * pResult = NULL; 91*cdf0e10cSrcweir sal_Int32 nLen; 92*cdf0e10cSrcweir rtl_math_doubleToString( 93*cdf0e10cSrcweir &pResult, 0, 0, d, rtl_math_StringFormat_G, 94*cdf0e10cSrcweir RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 95*cdf0e10cSrcweir 0, sal_True); 96*cdf0e10cSrcweir nLen = pResult->length; 97*cdf0e10cSrcweir OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE); 98*cdf0e10cSrcweir rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char)); 99*cdf0e10cSrcweir rtl_string_release(pResult); 100*cdf0e10cSrcweir return nLen; 101*cdf0e10cSrcweir } 102*cdf0e10cSrcweir 103*cdf0e10cSrcweir float SAL_CALL rtl_str_toFloat(sal_Char const * pStr) 104*cdf0e10cSrcweir { 105*cdf0e10cSrcweir return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), 106*cdf0e10cSrcweir '.', 0, 0, 0); 107*cdf0e10cSrcweir } 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir double SAL_CALL rtl_str_toDouble(sal_Char const * pStr) 110*cdf0e10cSrcweir { 111*cdf0e10cSrcweir return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0, 112*cdf0e10cSrcweir 0, 0); 113*cdf0e10cSrcweir } 114*cdf0e10cSrcweir 115*cdf0e10cSrcweir /* ======================================================================= */ 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen ) 118*cdf0e10cSrcweir { 119*cdf0e10cSrcweir int n; 120*cdf0e10cSrcweir sal_Unicode c; 121*cdf0e10cSrcweir sal_uInt32 nUCS4Char; 122*cdf0e10cSrcweir const sal_Unicode* pEndStr; 123*cdf0e10cSrcweir 124*cdf0e10cSrcweir n = 0; 125*cdf0e10cSrcweir pEndStr = pStr+nLen; 126*cdf0e10cSrcweir while ( pStr < pEndStr ) 127*cdf0e10cSrcweir { 128*cdf0e10cSrcweir c = *pStr; 129*cdf0e10cSrcweir 130*cdf0e10cSrcweir if ( c < 0x80 ) 131*cdf0e10cSrcweir n++; 132*cdf0e10cSrcweir else if ( c < 0x800 ) 133*cdf0e10cSrcweir n += 2; 134*cdf0e10cSrcweir else 135*cdf0e10cSrcweir { 136*cdf0e10cSrcweir if ( !SAL_RTL_IS_HIGH_SURROGATE(c) ) 137*cdf0e10cSrcweir n += 3; 138*cdf0e10cSrcweir else 139*cdf0e10cSrcweir { 140*cdf0e10cSrcweir nUCS4Char = c; 141*cdf0e10cSrcweir 142*cdf0e10cSrcweir if ( pStr+1 < pEndStr ) 143*cdf0e10cSrcweir { 144*cdf0e10cSrcweir c = *(pStr+1); 145*cdf0e10cSrcweir if ( SAL_RTL_IS_LOW_SURROGATE(c) ) 146*cdf0e10cSrcweir { 147*cdf0e10cSrcweir nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c); 148*cdf0e10cSrcweir pStr++; 149*cdf0e10cSrcweir } 150*cdf0e10cSrcweir } 151*cdf0e10cSrcweir 152*cdf0e10cSrcweir if ( nUCS4Char < 0x10000 ) 153*cdf0e10cSrcweir n += 3; 154*cdf0e10cSrcweir else if ( nUCS4Char < 0x200000 ) 155*cdf0e10cSrcweir n += 4; 156*cdf0e10cSrcweir else if ( nUCS4Char < 0x4000000 ) 157*cdf0e10cSrcweir n += 5; 158*cdf0e10cSrcweir else 159*cdf0e10cSrcweir n += 6; 160*cdf0e10cSrcweir } 161*cdf0e10cSrcweir } 162*cdf0e10cSrcweir 163*cdf0e10cSrcweir pStr++; 164*cdf0e10cSrcweir } 165*cdf0e10cSrcweir 166*cdf0e10cSrcweir return n; 167*cdf0e10cSrcweir } 168*cdf0e10cSrcweir 169*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 170*cdf0e10cSrcweir 171*cdf0e10cSrcweir sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget, 172*cdf0e10cSrcweir sal_Unicode const * pSource, 173*cdf0e10cSrcweir sal_Int32 nLength, 174*cdf0e10cSrcweir rtl_TextEncoding nEncoding, 175*cdf0e10cSrcweir sal_uInt32 nFlags, 176*cdf0e10cSrcweir sal_Bool bCheckErrors) 177*cdf0e10cSrcweir { 178*cdf0e10cSrcweir OSL_ASSERT(pTarget != NULL 179*cdf0e10cSrcweir && (pSource != NULL || nLength == 0) 180*cdf0e10cSrcweir && nLength >= 0 181*cdf0e10cSrcweir && rtl_isOctetTextEncoding(nEncoding)); 182*cdf0e10cSrcweir 183*cdf0e10cSrcweir if ( !nLength ) 184*cdf0e10cSrcweir rtl_string_new( pTarget ); 185*cdf0e10cSrcweir else 186*cdf0e10cSrcweir { 187*cdf0e10cSrcweir rtl_String* pTemp; 188*cdf0e10cSrcweir rtl_UnicodeToTextConverter hConverter; 189*cdf0e10cSrcweir sal_uInt32 nInfo; 190*cdf0e10cSrcweir sal_Size nSrcChars; 191*cdf0e10cSrcweir sal_Size nDestBytes; 192*cdf0e10cSrcweir sal_Size nNewLen; 193*cdf0e10cSrcweir sal_Size nNotConvertedChars; 194*cdf0e10cSrcweir sal_Size nMaxCharLen; 195*cdf0e10cSrcweir 196*cdf0e10cSrcweir /* Optimization for UTF-8 - we try to calculate the exact length */ 197*cdf0e10cSrcweir /* For all other encoding we try an good estimation */ 198*cdf0e10cSrcweir if ( nEncoding == RTL_TEXTENCODING_UTF8 ) 199*cdf0e10cSrcweir { 200*cdf0e10cSrcweir nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength ); 201*cdf0e10cSrcweir /* Includes the string only ASCII, then we could copy 202*cdf0e10cSrcweir the buffer faster */ 203*cdf0e10cSrcweir if ( nNewLen == (sal_Size)nLength ) 204*cdf0e10cSrcweir { 205*cdf0e10cSrcweir IMPL_RTL_STRCODE* pBuffer; 206*cdf0e10cSrcweir if ( *pTarget ) 207*cdf0e10cSrcweir IMPL_RTL_STRINGNAME( release )( *pTarget ); 208*cdf0e10cSrcweir *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength ); 209*cdf0e10cSrcweir OSL_ASSERT(*pTarget != NULL); 210*cdf0e10cSrcweir pBuffer = (*pTarget)->buffer; 211*cdf0e10cSrcweir do 212*cdf0e10cSrcweir { 213*cdf0e10cSrcweir /* Check ASCII range */ 214*cdf0e10cSrcweir OSL_ENSURE( *pSource <= 127, 215*cdf0e10cSrcweir "rtl_uString2String() - UTF8 test is encoding is wrong" ); 216*cdf0e10cSrcweir 217*cdf0e10cSrcweir *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource; 218*cdf0e10cSrcweir pBuffer++; 219*cdf0e10cSrcweir pSource++; 220*cdf0e10cSrcweir nLength--; 221*cdf0e10cSrcweir } 222*cdf0e10cSrcweir while ( nLength ); 223*cdf0e10cSrcweir return sal_True; 224*cdf0e10cSrcweir } 225*cdf0e10cSrcweir 226*cdf0e10cSrcweir nMaxCharLen = 4; 227*cdf0e10cSrcweir } 228*cdf0e10cSrcweir else 229*cdf0e10cSrcweir { 230*cdf0e10cSrcweir rtl_TextEncodingInfo aTextEncInfo; 231*cdf0e10cSrcweir aTextEncInfo.StructSize = sizeof( aTextEncInfo ); 232*cdf0e10cSrcweir if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) ) 233*cdf0e10cSrcweir { 234*cdf0e10cSrcweir aTextEncInfo.AverageCharSize = 1; 235*cdf0e10cSrcweir aTextEncInfo.MaximumCharSize = 8; 236*cdf0e10cSrcweir } 237*cdf0e10cSrcweir 238*cdf0e10cSrcweir nNewLen = nLength*aTextEncInfo.AverageCharSize; 239*cdf0e10cSrcweir nMaxCharLen = aTextEncInfo.MaximumCharSize; 240*cdf0e10cSrcweir } 241*cdf0e10cSrcweir 242*cdf0e10cSrcweir nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH; 243*cdf0e10cSrcweir hConverter = rtl_createUnicodeToTextConverter( nEncoding ); 244*cdf0e10cSrcweir 245*cdf0e10cSrcweir for (;;) 246*cdf0e10cSrcweir { 247*cdf0e10cSrcweir pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 248*cdf0e10cSrcweir OSL_ASSERT(pTemp != NULL); 249*cdf0e10cSrcweir nDestBytes = rtl_convertUnicodeToText( hConverter, 0, 250*cdf0e10cSrcweir pSource, nLength, 251*cdf0e10cSrcweir pTemp->buffer, nNewLen, 252*cdf0e10cSrcweir nFlags, 253*cdf0e10cSrcweir &nInfo, &nSrcChars ); 254*cdf0e10cSrcweir if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0) 255*cdf0e10cSrcweir { 256*cdf0e10cSrcweir rtl_freeMemory(pTemp); 257*cdf0e10cSrcweir rtl_destroyUnicodeToTextConverter(hConverter); 258*cdf0e10cSrcweir return sal_False; 259*cdf0e10cSrcweir } 260*cdf0e10cSrcweir 261*cdf0e10cSrcweir if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0) 262*cdf0e10cSrcweir break; 263*cdf0e10cSrcweir 264*cdf0e10cSrcweir /* Buffer not big enough, try again with enough space */ 265*cdf0e10cSrcweir rtl_freeMemory( pTemp ); 266*cdf0e10cSrcweir 267*cdf0e10cSrcweir /* Try with the max. count of characters with 268*cdf0e10cSrcweir additional overhead for replacing functionality */ 269*cdf0e10cSrcweir nNotConvertedChars = nLength-nSrcChars; 270*cdf0e10cSrcweir nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4; 271*cdf0e10cSrcweir } 272*cdf0e10cSrcweir 273*cdf0e10cSrcweir /* Set the buffer to the correct size or is there to 274*cdf0e10cSrcweir much overhead, reallocate to the correct size */ 275*cdf0e10cSrcweir if ( nNewLen > nDestBytes+8 ) 276*cdf0e10cSrcweir { 277*cdf0e10cSrcweir rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes ); 278*cdf0e10cSrcweir OSL_ASSERT(pTemp2 != NULL); 279*cdf0e10cSrcweir rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes ); 280*cdf0e10cSrcweir rtl_freeMemory( pTemp ); 281*cdf0e10cSrcweir pTemp = pTemp2; 282*cdf0e10cSrcweir } 283*cdf0e10cSrcweir else 284*cdf0e10cSrcweir { 285*cdf0e10cSrcweir pTemp->length = nDestBytes; 286*cdf0e10cSrcweir pTemp->buffer[nDestBytes] = 0; 287*cdf0e10cSrcweir } 288*cdf0e10cSrcweir 289*cdf0e10cSrcweir rtl_destroyUnicodeToTextConverter( hConverter ); 290*cdf0e10cSrcweir if ( *pTarget ) 291*cdf0e10cSrcweir IMPL_RTL_STRINGNAME( release )( *pTarget ); 292*cdf0e10cSrcweir *pTarget = pTemp; 293*cdf0e10cSrcweir 294*cdf0e10cSrcweir /* Results the conversion in an empty buffer - 295*cdf0e10cSrcweir create an empty string */ 296*cdf0e10cSrcweir if ( pTemp && !nDestBytes ) 297*cdf0e10cSrcweir rtl_string_new( pTarget ); 298*cdf0e10cSrcweir } 299*cdf0e10cSrcweir return sal_True; 300*cdf0e10cSrcweir } 301*cdf0e10cSrcweir 302*cdf0e10cSrcweir void SAL_CALL rtl_uString2String( rtl_String** ppThis, 303*cdf0e10cSrcweir const sal_Unicode* pUStr, 304*cdf0e10cSrcweir sal_Int32 nULen, 305*cdf0e10cSrcweir rtl_TextEncoding eTextEncoding, 306*cdf0e10cSrcweir sal_uInt32 nCvtFlags ) 307*cdf0e10cSrcweir { 308*cdf0e10cSrcweir rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding, 309*cdf0e10cSrcweir nCvtFlags, sal_False); 310*cdf0e10cSrcweir } 311*cdf0e10cSrcweir 312*cdf0e10cSrcweir sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget, 313*cdf0e10cSrcweir sal_Unicode const * pSource, 314*cdf0e10cSrcweir sal_Int32 nLength, 315*cdf0e10cSrcweir rtl_TextEncoding nEncoding, 316*cdf0e10cSrcweir sal_uInt32 nFlags) 317*cdf0e10cSrcweir { 318*cdf0e10cSrcweir return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding, 319*cdf0e10cSrcweir nFlags, sal_True); 320*cdf0e10cSrcweir } 321