xref: /aoo42x/main/sal/rtl/source/string.c (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir #if defined(_MSC_VER) && (_MSC_VER >= 1400)
28*cdf0e10cSrcweir #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
29*cdf0e10cSrcweir #endif
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <rtl/memory.h>
32*cdf0e10cSrcweir #include <osl/interlck.h>
33*cdf0e10cSrcweir #include <rtl/alloc.h>
34*cdf0e10cSrcweir #include <osl/diagnose.h>
35*cdf0e10cSrcweir #include <rtl/tencinfo.h>
36*cdf0e10cSrcweir 
37*cdf0e10cSrcweir #include "strimp.h"
38*cdf0e10cSrcweir #include "surrogates.h"
39*cdf0e10cSrcweir #include <rtl/string.h>
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir #include "rtl/math.h"
42*cdf0e10cSrcweir #include "rtl/tencinfo.h"
43*cdf0e10cSrcweir 
44*cdf0e10cSrcweir /* ======================================================================= */
45*cdf0e10cSrcweir 
46*cdf0e10cSrcweir /* static data to be referenced by all empty strings
47*cdf0e10cSrcweir  * the refCount is predefined to 1 and must never become 0 !
48*cdf0e10cSrcweir  */
49*cdf0e10cSrcweir static rtl_String const aImplEmpty_rtl_String =
50*cdf0e10cSrcweir {
51*cdf0e10cSrcweir     SAL_STRING_STATIC_FLAG|1,
52*cdf0e10cSrcweir             /* sal_Int32    refCount;   */
53*cdf0e10cSrcweir     0,      /* sal_Int32    length;     */
54*cdf0e10cSrcweir     { 0 }   /* sal_Char     buffer[1];  */
55*cdf0e10cSrcweir };
56*cdf0e10cSrcweir 
57*cdf0e10cSrcweir /* ======================================================================= */
58*cdf0e10cSrcweir 
59*cdf0e10cSrcweir #define IMPL_RTL_STRCODE            sal_Char
60*cdf0e10cSrcweir #define IMPL_RTL_USTRCODE( c )      ((unsigned char)c)
61*cdf0e10cSrcweir #define IMPL_RTL_STRNAME( n )       rtl_str_ ## n
62*cdf0e10cSrcweir 
63*cdf0e10cSrcweir #define IMPL_RTL_STRINGNAME( n )    rtl_string_ ## n
64*cdf0e10cSrcweir #define IMPL_RTL_STRINGDATA         rtl_String
65*cdf0e10cSrcweir #define IMPL_RTL_EMPTYSTRING        aImplEmpty_rtl_String
66*cdf0e10cSrcweir 
67*cdf0e10cSrcweir /* ======================================================================= */
68*cdf0e10cSrcweir 
69*cdf0e10cSrcweir /* Include String/UString template code */
70*cdf0e10cSrcweir 
71*cdf0e10cSrcweir #include "strtmpl.c"
72*cdf0e10cSrcweir 
73*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f)
74*cdf0e10cSrcweir {
75*cdf0e10cSrcweir     rtl_String * pResult = NULL;
76*cdf0e10cSrcweir     sal_Int32 nLen;
77*cdf0e10cSrcweir     rtl_math_doubleToString(
78*cdf0e10cSrcweir         &pResult, 0, 0, f, rtl_math_StringFormat_G,
79*cdf0e10cSrcweir         RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0,
80*cdf0e10cSrcweir         sal_True);
81*cdf0e10cSrcweir     nLen = pResult->length;
82*cdf0e10cSrcweir     OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT);
83*cdf0e10cSrcweir     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
84*cdf0e10cSrcweir     rtl_string_release(pResult);
85*cdf0e10cSrcweir     return nLen;
86*cdf0e10cSrcweir }
87*cdf0e10cSrcweir 
88*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d)
89*cdf0e10cSrcweir {
90*cdf0e10cSrcweir     rtl_String * pResult = NULL;
91*cdf0e10cSrcweir     sal_Int32 nLen;
92*cdf0e10cSrcweir     rtl_math_doubleToString(
93*cdf0e10cSrcweir         &pResult, 0, 0, d, rtl_math_StringFormat_G,
94*cdf0e10cSrcweir         RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
95*cdf0e10cSrcweir         0, sal_True);
96*cdf0e10cSrcweir     nLen = pResult->length;
97*cdf0e10cSrcweir     OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE);
98*cdf0e10cSrcweir     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
99*cdf0e10cSrcweir     rtl_string_release(pResult);
100*cdf0e10cSrcweir     return nLen;
101*cdf0e10cSrcweir }
102*cdf0e10cSrcweir 
103*cdf0e10cSrcweir float SAL_CALL rtl_str_toFloat(sal_Char const * pStr)
104*cdf0e10cSrcweir {
105*cdf0e10cSrcweir     return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr),
106*cdf0e10cSrcweir                                            '.', 0, 0, 0);
107*cdf0e10cSrcweir }
108*cdf0e10cSrcweir 
109*cdf0e10cSrcweir double SAL_CALL rtl_str_toDouble(sal_Char const * pStr)
110*cdf0e10cSrcweir {
111*cdf0e10cSrcweir     return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0,
112*cdf0e10cSrcweir                                    0, 0);
113*cdf0e10cSrcweir }
114*cdf0e10cSrcweir 
115*cdf0e10cSrcweir /* ======================================================================= */
116*cdf0e10cSrcweir 
117*cdf0e10cSrcweir static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
118*cdf0e10cSrcweir {
119*cdf0e10cSrcweir     int                 n;
120*cdf0e10cSrcweir     sal_Unicode         c;
121*cdf0e10cSrcweir     sal_uInt32          nUCS4Char;
122*cdf0e10cSrcweir     const sal_Unicode*  pEndStr;
123*cdf0e10cSrcweir 
124*cdf0e10cSrcweir     n = 0;
125*cdf0e10cSrcweir     pEndStr  = pStr+nLen;
126*cdf0e10cSrcweir     while ( pStr < pEndStr )
127*cdf0e10cSrcweir     {
128*cdf0e10cSrcweir         c = *pStr;
129*cdf0e10cSrcweir 
130*cdf0e10cSrcweir         if ( c < 0x80 )
131*cdf0e10cSrcweir             n++;
132*cdf0e10cSrcweir         else if ( c < 0x800 )
133*cdf0e10cSrcweir             n += 2;
134*cdf0e10cSrcweir         else
135*cdf0e10cSrcweir         {
136*cdf0e10cSrcweir             if ( !SAL_RTL_IS_HIGH_SURROGATE(c) )
137*cdf0e10cSrcweir                 n += 3;
138*cdf0e10cSrcweir             else
139*cdf0e10cSrcweir             {
140*cdf0e10cSrcweir                 nUCS4Char = c;
141*cdf0e10cSrcweir 
142*cdf0e10cSrcweir                 if ( pStr+1 < pEndStr )
143*cdf0e10cSrcweir                 {
144*cdf0e10cSrcweir                     c = *(pStr+1);
145*cdf0e10cSrcweir                     if ( SAL_RTL_IS_LOW_SURROGATE(c) )
146*cdf0e10cSrcweir                     {
147*cdf0e10cSrcweir                         nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c);
148*cdf0e10cSrcweir                         pStr++;
149*cdf0e10cSrcweir                     }
150*cdf0e10cSrcweir                 }
151*cdf0e10cSrcweir 
152*cdf0e10cSrcweir                 if ( nUCS4Char < 0x10000 )
153*cdf0e10cSrcweir                     n += 3;
154*cdf0e10cSrcweir                 else if ( nUCS4Char < 0x200000 )
155*cdf0e10cSrcweir                     n += 4;
156*cdf0e10cSrcweir                 else if ( nUCS4Char < 0x4000000 )
157*cdf0e10cSrcweir                     n += 5;
158*cdf0e10cSrcweir                 else
159*cdf0e10cSrcweir                     n += 6;
160*cdf0e10cSrcweir             }
161*cdf0e10cSrcweir         }
162*cdf0e10cSrcweir 
163*cdf0e10cSrcweir         pStr++;
164*cdf0e10cSrcweir     }
165*cdf0e10cSrcweir 
166*cdf0e10cSrcweir     return n;
167*cdf0e10cSrcweir }
168*cdf0e10cSrcweir 
169*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
170*cdf0e10cSrcweir 
171*cdf0e10cSrcweir sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget,
172*cdf0e10cSrcweir                                                   sal_Unicode const * pSource,
173*cdf0e10cSrcweir                                                   sal_Int32 nLength,
174*cdf0e10cSrcweir                                                   rtl_TextEncoding nEncoding,
175*cdf0e10cSrcweir                                                   sal_uInt32 nFlags,
176*cdf0e10cSrcweir                                                   sal_Bool bCheckErrors)
177*cdf0e10cSrcweir {
178*cdf0e10cSrcweir     OSL_ASSERT(pTarget != NULL
179*cdf0e10cSrcweir                && (pSource != NULL || nLength == 0)
180*cdf0e10cSrcweir                && nLength >= 0
181*cdf0e10cSrcweir                && rtl_isOctetTextEncoding(nEncoding));
182*cdf0e10cSrcweir 
183*cdf0e10cSrcweir     if ( !nLength )
184*cdf0e10cSrcweir         rtl_string_new( pTarget );
185*cdf0e10cSrcweir     else
186*cdf0e10cSrcweir     {
187*cdf0e10cSrcweir         rtl_String*                 pTemp;
188*cdf0e10cSrcweir         rtl_UnicodeToTextConverter  hConverter;
189*cdf0e10cSrcweir         sal_uInt32                  nInfo;
190*cdf0e10cSrcweir         sal_Size                    nSrcChars;
191*cdf0e10cSrcweir         sal_Size                    nDestBytes;
192*cdf0e10cSrcweir         sal_Size                    nNewLen;
193*cdf0e10cSrcweir         sal_Size                    nNotConvertedChars;
194*cdf0e10cSrcweir         sal_Size                    nMaxCharLen;
195*cdf0e10cSrcweir 
196*cdf0e10cSrcweir         /* Optimization for UTF-8 - we try to calculate the exact length */
197*cdf0e10cSrcweir         /* For all other encoding we try an good estimation */
198*cdf0e10cSrcweir         if ( nEncoding == RTL_TEXTENCODING_UTF8 )
199*cdf0e10cSrcweir         {
200*cdf0e10cSrcweir             nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength );
201*cdf0e10cSrcweir             /* Includes the string only ASCII, then we could copy
202*cdf0e10cSrcweir                the buffer faster */
203*cdf0e10cSrcweir             if ( nNewLen == (sal_Size)nLength )
204*cdf0e10cSrcweir             {
205*cdf0e10cSrcweir                 IMPL_RTL_STRCODE* pBuffer;
206*cdf0e10cSrcweir                 if ( *pTarget )
207*cdf0e10cSrcweir                     IMPL_RTL_STRINGNAME( release )( *pTarget );
208*cdf0e10cSrcweir                 *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength );
209*cdf0e10cSrcweir                 OSL_ASSERT(*pTarget != NULL);
210*cdf0e10cSrcweir                 pBuffer = (*pTarget)->buffer;
211*cdf0e10cSrcweir                 do
212*cdf0e10cSrcweir                 {
213*cdf0e10cSrcweir                     /* Check ASCII range */
214*cdf0e10cSrcweir                     OSL_ENSURE( *pSource <= 127,
215*cdf0e10cSrcweir                                 "rtl_uString2String() - UTF8 test is encoding is wrong" );
216*cdf0e10cSrcweir 
217*cdf0e10cSrcweir                     *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource;
218*cdf0e10cSrcweir                     pBuffer++;
219*cdf0e10cSrcweir                     pSource++;
220*cdf0e10cSrcweir                     nLength--;
221*cdf0e10cSrcweir                 }
222*cdf0e10cSrcweir                 while ( nLength );
223*cdf0e10cSrcweir                 return sal_True;
224*cdf0e10cSrcweir             }
225*cdf0e10cSrcweir 
226*cdf0e10cSrcweir             nMaxCharLen = 4;
227*cdf0e10cSrcweir         }
228*cdf0e10cSrcweir         else
229*cdf0e10cSrcweir         {
230*cdf0e10cSrcweir             rtl_TextEncodingInfo aTextEncInfo;
231*cdf0e10cSrcweir             aTextEncInfo.StructSize = sizeof( aTextEncInfo );
232*cdf0e10cSrcweir             if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) )
233*cdf0e10cSrcweir             {
234*cdf0e10cSrcweir                 aTextEncInfo.AverageCharSize    = 1;
235*cdf0e10cSrcweir                 aTextEncInfo.MaximumCharSize    = 8;
236*cdf0e10cSrcweir             }
237*cdf0e10cSrcweir 
238*cdf0e10cSrcweir             nNewLen = nLength*aTextEncInfo.AverageCharSize;
239*cdf0e10cSrcweir             nMaxCharLen = aTextEncInfo.MaximumCharSize;
240*cdf0e10cSrcweir         }
241*cdf0e10cSrcweir 
242*cdf0e10cSrcweir         nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH;
243*cdf0e10cSrcweir         hConverter = rtl_createUnicodeToTextConverter( nEncoding );
244*cdf0e10cSrcweir 
245*cdf0e10cSrcweir         for (;;)
246*cdf0e10cSrcweir         {
247*cdf0e10cSrcweir             pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
248*cdf0e10cSrcweir             OSL_ASSERT(pTemp != NULL);
249*cdf0e10cSrcweir             nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
250*cdf0e10cSrcweir                                                    pSource, nLength,
251*cdf0e10cSrcweir                                                    pTemp->buffer, nNewLen,
252*cdf0e10cSrcweir                                                    nFlags,
253*cdf0e10cSrcweir                                                    &nInfo, &nSrcChars );
254*cdf0e10cSrcweir             if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0)
255*cdf0e10cSrcweir             {
256*cdf0e10cSrcweir                 rtl_freeMemory(pTemp);
257*cdf0e10cSrcweir                 rtl_destroyUnicodeToTextConverter(hConverter);
258*cdf0e10cSrcweir                 return sal_False;
259*cdf0e10cSrcweir             }
260*cdf0e10cSrcweir 
261*cdf0e10cSrcweir             if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
262*cdf0e10cSrcweir                 break;
263*cdf0e10cSrcweir 
264*cdf0e10cSrcweir             /* Buffer not big enough, try again with enough space */
265*cdf0e10cSrcweir             rtl_freeMemory( pTemp );
266*cdf0e10cSrcweir 
267*cdf0e10cSrcweir             /* Try with the max. count of characters with
268*cdf0e10cSrcweir                additional overhead for replacing functionality */
269*cdf0e10cSrcweir             nNotConvertedChars = nLength-nSrcChars;
270*cdf0e10cSrcweir             nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4;
271*cdf0e10cSrcweir         }
272*cdf0e10cSrcweir 
273*cdf0e10cSrcweir         /* Set the buffer to the correct size or is there to
274*cdf0e10cSrcweir            much overhead, reallocate to the correct size */
275*cdf0e10cSrcweir         if ( nNewLen > nDestBytes+8 )
276*cdf0e10cSrcweir         {
277*cdf0e10cSrcweir             rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes );
278*cdf0e10cSrcweir             OSL_ASSERT(pTemp2 != NULL);
279*cdf0e10cSrcweir             rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes );
280*cdf0e10cSrcweir             rtl_freeMemory( pTemp );
281*cdf0e10cSrcweir             pTemp = pTemp2;
282*cdf0e10cSrcweir         }
283*cdf0e10cSrcweir         else
284*cdf0e10cSrcweir         {
285*cdf0e10cSrcweir             pTemp->length = nDestBytes;
286*cdf0e10cSrcweir             pTemp->buffer[nDestBytes] = 0;
287*cdf0e10cSrcweir         }
288*cdf0e10cSrcweir 
289*cdf0e10cSrcweir         rtl_destroyUnicodeToTextConverter( hConverter );
290*cdf0e10cSrcweir         if ( *pTarget )
291*cdf0e10cSrcweir             IMPL_RTL_STRINGNAME( release )( *pTarget );
292*cdf0e10cSrcweir         *pTarget = pTemp;
293*cdf0e10cSrcweir 
294*cdf0e10cSrcweir         /* Results the conversion in an empty buffer -
295*cdf0e10cSrcweir            create an empty string */
296*cdf0e10cSrcweir         if ( pTemp && !nDestBytes )
297*cdf0e10cSrcweir             rtl_string_new( pTarget );
298*cdf0e10cSrcweir     }
299*cdf0e10cSrcweir     return sal_True;
300*cdf0e10cSrcweir }
301*cdf0e10cSrcweir 
302*cdf0e10cSrcweir void SAL_CALL rtl_uString2String( rtl_String** ppThis,
303*cdf0e10cSrcweir                                   const sal_Unicode* pUStr,
304*cdf0e10cSrcweir                                   sal_Int32 nULen,
305*cdf0e10cSrcweir                                   rtl_TextEncoding eTextEncoding,
306*cdf0e10cSrcweir                                   sal_uInt32 nCvtFlags )
307*cdf0e10cSrcweir {
308*cdf0e10cSrcweir     rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding,
309*cdf0e10cSrcweir                                     nCvtFlags, sal_False);
310*cdf0e10cSrcweir }
311*cdf0e10cSrcweir 
312*cdf0e10cSrcweir sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget,
313*cdf0e10cSrcweir                                              sal_Unicode const * pSource,
314*cdf0e10cSrcweir                                              sal_Int32 nLength,
315*cdf0e10cSrcweir                                              rtl_TextEncoding nEncoding,
316*cdf0e10cSrcweir                                              sal_uInt32 nFlags)
317*cdf0e10cSrcweir {
318*cdf0e10cSrcweir     return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding,
319*cdf0e10cSrcweir                                            nFlags, sal_True);
320*cdf0e10cSrcweir }
321