xref: /aoo41x/main/sal/rtl/source/string.c (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
28 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
29 #endif
30 
31 #include <rtl/memory.h>
32 #include <osl/interlck.h>
33 #include <rtl/alloc.h>
34 #include <osl/diagnose.h>
35 #include <rtl/tencinfo.h>
36 
37 #include "strimp.h"
38 #include "surrogates.h"
39 #include <rtl/string.h>
40 
41 #include "rtl/math.h"
42 #include "rtl/tencinfo.h"
43 
44 /* ======================================================================= */
45 
46 /* static data to be referenced by all empty strings
47  * the refCount is predefined to 1 and must never become 0 !
48  */
49 static rtl_String const aImplEmpty_rtl_String =
50 {
51     SAL_STRING_STATIC_FLAG|1,
52             /* sal_Int32    refCount;   */
53     0,      /* sal_Int32    length;     */
54     { 0 }   /* sal_Char     buffer[1];  */
55 };
56 
57 /* ======================================================================= */
58 
59 #define IMPL_RTL_STRCODE            sal_Char
60 #define IMPL_RTL_USTRCODE( c )      ((unsigned char)c)
61 #define IMPL_RTL_STRNAME( n )       rtl_str_ ## n
62 
63 #define IMPL_RTL_STRINGNAME( n )    rtl_string_ ## n
64 #define IMPL_RTL_STRINGDATA         rtl_String
65 #define IMPL_RTL_EMPTYSTRING        aImplEmpty_rtl_String
66 
67 /* ======================================================================= */
68 
69 /* Include String/UString template code */
70 
71 #include "strtmpl.c"
72 
73 sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f)
74 {
75     rtl_String * pResult = NULL;
76     sal_Int32 nLen;
77     rtl_math_doubleToString(
78         &pResult, 0, 0, f, rtl_math_StringFormat_G,
79         RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0,
80         sal_True);
81     nLen = pResult->length;
82     OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT);
83     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
84     rtl_string_release(pResult);
85     return nLen;
86 }
87 
88 sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d)
89 {
90     rtl_String * pResult = NULL;
91     sal_Int32 nLen;
92     rtl_math_doubleToString(
93         &pResult, 0, 0, d, rtl_math_StringFormat_G,
94         RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
95         0, sal_True);
96     nLen = pResult->length;
97     OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE);
98     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
99     rtl_string_release(pResult);
100     return nLen;
101 }
102 
103 float SAL_CALL rtl_str_toFloat(sal_Char const * pStr)
104 {
105     return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr),
106                                            '.', 0, 0, 0);
107 }
108 
109 double SAL_CALL rtl_str_toDouble(sal_Char const * pStr)
110 {
111     return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0,
112                                    0, 0);
113 }
114 
115 /* ======================================================================= */
116 
117 static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
118 {
119     int                 n;
120     sal_Unicode         c;
121     sal_uInt32          nUCS4Char;
122     const sal_Unicode*  pEndStr;
123 
124     n = 0;
125     pEndStr  = pStr+nLen;
126     while ( pStr < pEndStr )
127     {
128         c = *pStr;
129 
130         if ( c < 0x80 )
131             n++;
132         else if ( c < 0x800 )
133             n += 2;
134         else
135         {
136             if ( !SAL_RTL_IS_HIGH_SURROGATE(c) )
137                 n += 3;
138             else
139             {
140                 nUCS4Char = c;
141 
142                 if ( pStr+1 < pEndStr )
143                 {
144                     c = *(pStr+1);
145                     if ( SAL_RTL_IS_LOW_SURROGATE(c) )
146                     {
147                         nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c);
148                         pStr++;
149                     }
150                 }
151 
152                 if ( nUCS4Char < 0x10000 )
153                     n += 3;
154                 else if ( nUCS4Char < 0x200000 )
155                     n += 4;
156                 else if ( nUCS4Char < 0x4000000 )
157                     n += 5;
158                 else
159                     n += 6;
160             }
161         }
162 
163         pStr++;
164     }
165 
166     return n;
167 }
168 
169 /* ----------------------------------------------------------------------- */
170 
171 sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget,
172                                                   sal_Unicode const * pSource,
173                                                   sal_Int32 nLength,
174                                                   rtl_TextEncoding nEncoding,
175                                                   sal_uInt32 nFlags,
176                                                   sal_Bool bCheckErrors)
177 {
178     OSL_ASSERT(pTarget != NULL
179                && (pSource != NULL || nLength == 0)
180                && nLength >= 0
181                && rtl_isOctetTextEncoding(nEncoding));
182 
183     if ( !nLength )
184         rtl_string_new( pTarget );
185     else
186     {
187         rtl_String*                 pTemp;
188         rtl_UnicodeToTextConverter  hConverter;
189         sal_uInt32                  nInfo;
190         sal_Size                    nSrcChars;
191         sal_Size                    nDestBytes;
192         sal_Size                    nNewLen;
193         sal_Size                    nNotConvertedChars;
194         sal_Size                    nMaxCharLen;
195 
196         /* Optimization for UTF-8 - we try to calculate the exact length */
197         /* For all other encoding we try an good estimation */
198         if ( nEncoding == RTL_TEXTENCODING_UTF8 )
199         {
200             nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength );
201             /* Includes the string only ASCII, then we could copy
202                the buffer faster */
203             if ( nNewLen == (sal_Size)nLength )
204             {
205                 IMPL_RTL_STRCODE* pBuffer;
206                 if ( *pTarget )
207                     IMPL_RTL_STRINGNAME( release )( *pTarget );
208                 *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength );
209                 OSL_ASSERT(*pTarget != NULL);
210                 pBuffer = (*pTarget)->buffer;
211                 do
212                 {
213                     /* Check ASCII range */
214                     OSL_ENSURE( *pSource <= 127,
215                                 "rtl_uString2String() - UTF8 test is encoding is wrong" );
216 
217                     *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource;
218                     pBuffer++;
219                     pSource++;
220                     nLength--;
221                 }
222                 while ( nLength );
223                 return sal_True;
224             }
225 
226             nMaxCharLen = 4;
227         }
228         else
229         {
230             rtl_TextEncodingInfo aTextEncInfo;
231             aTextEncInfo.StructSize = sizeof( aTextEncInfo );
232             if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) )
233             {
234                 aTextEncInfo.AverageCharSize    = 1;
235                 aTextEncInfo.MaximumCharSize    = 8;
236             }
237 
238             nNewLen = nLength*aTextEncInfo.AverageCharSize;
239             nMaxCharLen = aTextEncInfo.MaximumCharSize;
240         }
241 
242         nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH;
243         hConverter = rtl_createUnicodeToTextConverter( nEncoding );
244 
245         for (;;)
246         {
247             pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
248             OSL_ASSERT(pTemp != NULL);
249             nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
250                                                    pSource, nLength,
251                                                    pTemp->buffer, nNewLen,
252                                                    nFlags,
253                                                    &nInfo, &nSrcChars );
254             if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0)
255             {
256                 rtl_freeMemory(pTemp);
257                 rtl_destroyUnicodeToTextConverter(hConverter);
258                 return sal_False;
259             }
260 
261             if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
262                 break;
263 
264             /* Buffer not big enough, try again with enough space */
265             rtl_freeMemory( pTemp );
266 
267             /* Try with the max. count of characters with
268                additional overhead for replacing functionality */
269             nNotConvertedChars = nLength-nSrcChars;
270             nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4;
271         }
272 
273         /* Set the buffer to the correct size or is there to
274            much overhead, reallocate to the correct size */
275         if ( nNewLen > nDestBytes+8 )
276         {
277             rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes );
278             OSL_ASSERT(pTemp2 != NULL);
279             rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes );
280             rtl_freeMemory( pTemp );
281             pTemp = pTemp2;
282         }
283         else
284         {
285             pTemp->length = nDestBytes;
286             pTemp->buffer[nDestBytes] = 0;
287         }
288 
289         rtl_destroyUnicodeToTextConverter( hConverter );
290         if ( *pTarget )
291             IMPL_RTL_STRINGNAME( release )( *pTarget );
292         *pTarget = pTemp;
293 
294         /* Results the conversion in an empty buffer -
295            create an empty string */
296         if ( pTemp && !nDestBytes )
297             rtl_string_new( pTarget );
298     }
299     return sal_True;
300 }
301 
302 void SAL_CALL rtl_uString2String( rtl_String** ppThis,
303                                   const sal_Unicode* pUStr,
304                                   sal_Int32 nULen,
305                                   rtl_TextEncoding eTextEncoding,
306                                   sal_uInt32 nCvtFlags )
307 {
308     rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding,
309                                     nCvtFlags, sal_False);
310 }
311 
312 sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget,
313                                              sal_Unicode const * pSource,
314                                              sal_Int32 nLength,
315                                              rtl_TextEncoding nEncoding,
316                                              sal_uInt32 nFlags)
317 {
318     return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding,
319                                            nFlags, sal_True);
320 }
321