xref: /trunk/main/sal/rtl/source/string.c (revision 647f063d)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
24 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
25 #endif
26 
27 #include <rtl/memory.h>
28 #include <osl/interlck.h>
29 #include <rtl/alloc.h>
30 #include <osl/diagnose.h>
31 #include <rtl/tencinfo.h>
32 
33 #include "strimp.h"
34 #include "surrogates.h"
35 #include <rtl/string.h>
36 
37 #include "rtl/math.h"
38 #include "rtl/tencinfo.h"
39 
40 /* ======================================================================= */
41 
42 /* static data to be referenced by all empty strings
43  * the refCount is predefined to 1 and must never become 0 !
44  */
45 static rtl_String const aImplEmpty_rtl_String =
46 {
47     SAL_STRING_STATIC_FLAG|1,
48             /* sal_Int32    refCount;   */
49     0,      /* sal_Int32    length;     */
50     { 0 }   /* sal_Char     buffer[1];  */
51 };
52 
53 /* ======================================================================= */
54 
55 #define IMPL_RTL_STRCODE            sal_Char
56 #define IMPL_RTL_USTRCODE( c )      ((unsigned char)c)
57 #define IMPL_RTL_STRNAME( n )       rtl_str_ ## n
58 
59 #define IMPL_RTL_STRINGNAME( n )    rtl_string_ ## n
60 #define IMPL_RTL_STRINGDATA         rtl_String
61 #define IMPL_RTL_EMPTYSTRING        aImplEmpty_rtl_String
62 
63 /* ======================================================================= */
64 
65 /* Include String/UString template code */
66 
67 #include "strtmpl.c"
68 
rtl_str_valueOfFloat(sal_Char * pStr,float f)69 sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f)
70 {
71     rtl_String * pResult = NULL;
72     sal_Int32 nLen;
73     rtl_math_doubleToString(
74         &pResult, 0, 0, f, rtl_math_StringFormat_G,
75         RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0,
76         sal_True);
77     nLen = pResult->length;
78     OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT);
79     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
80     rtl_string_release(pResult);
81     return nLen;
82 }
83 
rtl_str_valueOfDouble(sal_Char * pStr,double d)84 sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d)
85 {
86     rtl_String * pResult = NULL;
87     sal_Int32 nLen;
88     rtl_math_doubleToString(
89         &pResult, 0, 0, d, rtl_math_StringFormat_G,
90         RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
91         0, sal_True);
92     nLen = pResult->length;
93     OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE);
94     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
95     rtl_string_release(pResult);
96     return nLen;
97 }
98 
rtl_str_toFloat(sal_Char const * pStr)99 float SAL_CALL rtl_str_toFloat(sal_Char const * pStr)
100 {
101     return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr),
102                                            '.', 0, 0, 0);
103 }
104 
rtl_str_toDouble(sal_Char const * pStr)105 double SAL_CALL rtl_str_toDouble(sal_Char const * pStr)
106 {
107     return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0,
108                                    0, 0);
109 }
110 
111 /* ======================================================================= */
112 
rtl_ImplGetFastUTF8ByteLen(const sal_Unicode * pStr,sal_Int32 nLen)113 static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
114 {
115     int                 n;
116     sal_Unicode         c;
117     sal_uInt32          nUCS4Char;
118     const sal_Unicode*  pEndStr;
119 
120     n = 0;
121     pEndStr  = pStr+nLen;
122     while ( pStr < pEndStr )
123     {
124         c = *pStr;
125 
126         if ( c < 0x80 )
127             n++;
128         else if ( c < 0x800 )
129             n += 2;
130         else
131         {
132             if ( !SAL_RTL_IS_HIGH_SURROGATE(c) )
133                 n += 3;
134             else
135             {
136                 nUCS4Char = c;
137 
138                 if ( pStr+1 < pEndStr )
139                 {
140                     c = *(pStr+1);
141                     if ( SAL_RTL_IS_LOW_SURROGATE(c) )
142                     {
143                         nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c);
144                         pStr++;
145                     }
146                 }
147 
148                 if ( nUCS4Char < 0x10000 )
149                     n += 3;
150                 else if ( nUCS4Char < 0x200000 )
151                     n += 4;
152                 else if ( nUCS4Char < 0x4000000 )
153                     n += 5;
154                 else
155                     n += 6;
156             }
157         }
158 
159         pStr++;
160     }
161 
162     return n;
163 }
164 
165 /* ----------------------------------------------------------------------- */
166 
rtl_impl_convertUStringToString(rtl_String ** pTarget,sal_Unicode const * pSource,sal_Int32 nLength,rtl_TextEncoding nEncoding,sal_uInt32 nFlags,sal_Bool bCheckErrors)167 sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget,
168                                                   sal_Unicode const * pSource,
169                                                   sal_Int32 nLength,
170                                                   rtl_TextEncoding nEncoding,
171                                                   sal_uInt32 nFlags,
172                                                   sal_Bool bCheckErrors)
173 {
174     OSL_ASSERT(pTarget != NULL
175                && (pSource != NULL || nLength == 0)
176                && nLength >= 0
177                && rtl_isOctetTextEncoding(nEncoding));
178 
179     if ( !nLength )
180         rtl_string_new( pTarget );
181     else
182     {
183         rtl_String*                 pTemp;
184         rtl_UnicodeToTextConverter  hConverter;
185         sal_uInt32                  nInfo;
186         sal_Size                    nSrcChars;
187         sal_Size                    nDestBytes;
188         sal_Size                    nNewLen;
189         sal_Size                    nNotConvertedChars;
190         sal_Size                    nMaxCharLen;
191 
192         /* Optimization for UTF-8 - we try to calculate the exact length */
193         /* For all other encoding we try an good estimation */
194         if ( nEncoding == RTL_TEXTENCODING_UTF8 )
195         {
196             nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength );
197             /* Includes the string only ASCII, then we could copy
198                the buffer faster */
199             if ( nNewLen == (sal_Size)nLength )
200             {
201                 IMPL_RTL_STRCODE* pBuffer;
202                 if ( *pTarget )
203                     IMPL_RTL_STRINGNAME( release )( *pTarget );
204                 *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength );
205                 OSL_ASSERT(*pTarget != NULL);
206                 pBuffer = (*pTarget)->buffer;
207                 do
208                 {
209                     /* Check ASCII range */
210                     OSL_ENSURE( *pSource <= 127,
211                                 "rtl_uString2String() - UTF8 test is encoding is wrong" );
212 
213                     *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource;
214                     pBuffer++;
215                     pSource++;
216                     nLength--;
217                 }
218                 while ( nLength );
219                 return sal_True;
220             }
221 
222             nMaxCharLen = 4;
223         }
224         else
225         {
226             rtl_TextEncodingInfo aTextEncInfo;
227             aTextEncInfo.StructSize = sizeof( aTextEncInfo );
228             if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) )
229             {
230                 aTextEncInfo.AverageCharSize    = 1;
231                 aTextEncInfo.MaximumCharSize    = 8;
232             }
233 
234             nNewLen = nLength*aTextEncInfo.AverageCharSize;
235             nMaxCharLen = aTextEncInfo.MaximumCharSize;
236         }
237 
238         nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH;
239         hConverter = rtl_createUnicodeToTextConverter( nEncoding );
240 
241         for (;;)
242         {
243             pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
244             OSL_ASSERT(pTemp != NULL);
245             nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
246                                                    pSource, nLength,
247                                                    pTemp->buffer, nNewLen,
248                                                    nFlags,
249                                                    &nInfo, &nSrcChars );
250             if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0)
251             {
252                 rtl_freeMemory(pTemp);
253                 rtl_destroyUnicodeToTextConverter(hConverter);
254                 return sal_False;
255             }
256 
257             if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
258                 break;
259 
260             /* Buffer not big enough, try again with enough space */
261             rtl_freeMemory( pTemp );
262 
263             /* Try with the max. count of characters with
264                additional overhead for replacing functionality */
265             nNotConvertedChars = nLength-nSrcChars;
266             nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4;
267         }
268 
269         /* Set the buffer to the correct size or is there to
270            much overhead, reallocate to the correct size */
271         if ( nNewLen > nDestBytes+8 )
272         {
273             rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes );
274             OSL_ASSERT(pTemp2 != NULL);
275             rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes );
276             rtl_freeMemory( pTemp );
277             pTemp = pTemp2;
278         }
279         else
280         {
281             pTemp->length = nDestBytes;
282             pTemp->buffer[nDestBytes] = 0;
283         }
284 
285         rtl_destroyUnicodeToTextConverter( hConverter );
286         if ( *pTarget )
287             IMPL_RTL_STRINGNAME( release )( *pTarget );
288         *pTarget = pTemp;
289 
290         /* Results the conversion in an empty buffer -
291            create an empty string */
292         if ( pTemp && !nDestBytes )
293             rtl_string_new( pTarget );
294     }
295     return sal_True;
296 }
297 
rtl_uString2String(rtl_String ** ppThis,const sal_Unicode * pUStr,sal_Int32 nULen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)298 void SAL_CALL rtl_uString2String( rtl_String** ppThis,
299                                   const sal_Unicode* pUStr,
300                                   sal_Int32 nULen,
301                                   rtl_TextEncoding eTextEncoding,
302                                   sal_uInt32 nCvtFlags )
303 {
304     rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding,
305                                     nCvtFlags, sal_False);
306 }
307 
rtl_convertUStringToString(rtl_String ** pTarget,sal_Unicode const * pSource,sal_Int32 nLength,rtl_TextEncoding nEncoding,sal_uInt32 nFlags)308 sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget,
309                                              sal_Unicode const * pSource,
310                                              sal_Int32 nLength,
311                                              rtl_TextEncoding nEncoding,
312                                              sal_uInt32 nFlags)
313 {
314     return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding,
315                                            nFlags, sal_True);
316 }
317