xref: /trunk/main/sal/rtl/source/ustring.c (revision 647f063d)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
24 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
25 #endif
26 
27 #include <rtl/memory.h>
28 #include <osl/diagnose.h>
29 #include <osl/interlck.h>
30 #include <rtl/alloc.h>
31 #include <osl/mutex.h>
32 #include <osl/doublecheckedlocking.h>
33 #include <rtl/tencinfo.h>
34 
35 #include <string.h>
36 #include <sal/alloca.h>
37 
38 #include "hash.h"
39 #include "strimp.h"
40 #include "surrogates.h"
41 #include <rtl/ustring.h>
42 
43 #include "rtl/math.h"
44 #include "rtl/tencinfo.h"
45 
46 /* ======================================================================= */
47 
48 /* static data to be referenced by all empty strings
49  * the refCount is predefined to 1 and must never become 0 !
50  */
51 static rtl_uString const aImplEmpty_rtl_uString =
52 {
53     (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32    refCount; */
54     0,                                               /*sal_Int32    length;   */
55     { 0 }                                            /*sal_Unicode  buffer[1];*/
56 };
57 
58 /* ======================================================================= */
59 
60 #define IMPL_RTL_STRCODE            sal_Unicode
61 #define IMPL_RTL_USTRCODE( c )      (c)
62 #define IMPL_RTL_STRNAME( n )       rtl_ustr_ ## n
63 
64 #define IMPL_RTL_STRINGNAME( n )    rtl_uString_ ## n
65 #define IMPL_RTL_STRINGDATA         rtl_uString
66 #define IMPL_RTL_EMPTYSTRING        aImplEmpty_rtl_uString
67 #define IMPL_RTL_INTERN
68 static void internRelease (rtl_uString *pThis);
69 
70 /* ======================================================================= */
71 
72 /* Include String/UString template code */
73 
74 #include "strtmpl.c"
75 
rtl_ustr_indexOfAscii_WithLength(sal_Unicode const * str,sal_Int32 len,char const * subStr,sal_Int32 subLen)76 sal_Int32 rtl_ustr_indexOfAscii_WithLength(
77     sal_Unicode const * str, sal_Int32 len,
78     char const * subStr, sal_Int32 subLen)
79 {
80     if (subLen > 0 && subLen <= len) {
81         sal_Int32 i;
82         for (i = 0; i <= len - subLen; ++i) {
83             if (rtl_ustr_asciil_reverseEquals_WithLength(
84                     str + i, subStr, subLen))
85             {
86                 return i;
87             }
88         }
89     }
90     return -1;
91 }
92 
rtl_ustr_lastIndexOfAscii_WithLength(sal_Unicode const * str,sal_Int32 len,char const * subStr,sal_Int32 subLen)93 sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength(
94     sal_Unicode const * str, sal_Int32 len,
95     char const * subStr, sal_Int32 subLen)
96 {
97     if (subLen > 0 && subLen <= len) {
98         sal_Int32 i;
99         for (i = len - subLen; i >= 0; --i) {
100             if (rtl_ustr_asciil_reverseEquals_WithLength(
101                     str + i, subStr, subLen))
102             {
103                 return i;
104             }
105         }
106     }
107     return -1;
108 }
109 
rtl_ustr_valueOfFloat(sal_Unicode * pStr,float f)110 sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f)
111 {
112     rtl_uString * pResult = NULL;
113     sal_Int32 nLen;
114     rtl_math_doubleToUString(
115         &pResult, 0, 0, f, rtl_math_StringFormat_G,
116         RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
117         0, sal_True);
118     nLen = pResult->length;
119     OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT);
120     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
121     rtl_uString_release(pResult);
122     return nLen;
123 }
124 
rtl_ustr_valueOfDouble(sal_Unicode * pStr,double d)125 sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d)
126 {
127     rtl_uString * pResult = NULL;
128     sal_Int32 nLen;
129     rtl_math_doubleToUString(
130         &pResult, 0, 0, d, rtl_math_StringFormat_G,
131         RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
132         0, sal_True);
133     nLen = pResult->length;
134     OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE);
135     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
136     rtl_uString_release(pResult);
137     return nLen;
138 }
139 
rtl_ustr_toFloat(sal_Unicode const * pStr)140 float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr)
141 {
142     return (float) rtl_math_uStringToDouble(pStr,
143                                             pStr + rtl_ustr_getLength(pStr),
144                                             '.', 0, 0, 0);
145 }
146 
rtl_ustr_toDouble(sal_Unicode const * pStr)147 double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr)
148 {
149     return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.',
150                                     0, 0, 0);
151 }
152 
153 /* ======================================================================= */
154 
rtl_ustr_ascii_compare(const sal_Unicode * pStr1,const sal_Char * pStr2)155 sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1,
156                                            const sal_Char* pStr2 )
157 {
158     sal_Int32 nRet;
159     while ( ((nRet = ((sal_Int32)(*pStr1))-
160                      ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
161             *pStr2 )
162     {
163         pStr1++;
164         pStr2++;
165     }
166 
167     return nRet;
168 }
169 
170 /* ----------------------------------------------------------------------- */
171 
rtl_ustr_ascii_compare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2)172 sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1,
173                                                       sal_Int32 nStr1Len,
174                                                       const sal_Char* pStr2 )
175 {
176 	sal_Int32 nRet = 0;
177     while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)-
178                     ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
179            nStr1Len && *pStr2 )
180     {
181         pStr1++;
182         pStr2++;
183         nStr1Len--;
184     }
185 
186     return nRet;
187 }
188 
189 /* ----------------------------------------------------------------------- */
190 
rtl_ustr_ascii_shortenedCompare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nShortenedLength)191 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1,
192                                                                sal_Int32 nStr1Len,
193                                                                const sal_Char* pStr2,
194                                                                sal_Int32 nShortenedLength )
195 {
196     const sal_Unicode*  pStr1End = pStr1 + nStr1Len;
197     sal_Int32           nRet;
198     while ( (nShortenedLength > 0) &&
199             (pStr1 < pStr1End) && *pStr2 )
200     {
201         /* Check ASCII range */
202         OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
203 
204         nRet = ((sal_Int32)*pStr1)-
205                ((sal_Int32)(unsigned char)*pStr2);
206         if ( nRet != 0 )
207             return nRet;
208 
209         nShortenedLength--;
210         pStr1++;
211         pStr2++;
212     }
213 
214     if ( nShortenedLength <= 0 )
215         return 0;
216 
217     if ( *pStr2 )
218     {
219         OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
220         // first is a substring of the second string => less (negative value)
221         nRet = -1;
222     }
223     else
224     {
225         // greater or equal
226         nRet = pStr1End - pStr1;
227     }
228 
229     return nRet;
230 }
231 
232 /* ----------------------------------------------------------------------- */
233 
rtl_ustr_asciil_reverseCompare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nStr2Len)234 sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1,
235                                                               sal_Int32 nStr1Len,
236                                                               const sal_Char* pStr2,
237                                                               sal_Int32 nStr2Len )
238 {
239     const sal_Unicode*  pStr1Run = pStr1+nStr1Len;
240     const sal_Char*     pStr2Run = pStr2+nStr2Len;
241     sal_Int32           nRet;
242     while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) )
243     {
244         pStr1Run--;
245         pStr2Run--;
246         nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run);
247         if ( nRet )
248             return nRet;
249     }
250 
251     return nStr1Len - nStr2Len;
252 }
253 
254 /* ----------------------------------------------------------------------- */
255 
rtl_ustr_asciil_reverseEquals_WithLength(const sal_Unicode * pStr1,const sal_Char * pStr2,sal_Int32 nStrLen)256 sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1,
257                                                               const sal_Char* pStr2,
258                                                               sal_Int32 nStrLen )
259 {
260     const sal_Unicode*  pStr1Run = pStr1+nStrLen;
261     const sal_Char*     pStr2Run = pStr2+nStrLen;
262     while ( pStr1 < pStr1Run )
263     {
264         pStr1Run--;
265         pStr2Run--;
266 		if( *pStr1Run != (sal_Unicode)*pStr2Run )
267 			return sal_False;
268     }
269 
270     return sal_True;
271 }
272 
273 /* ----------------------------------------------------------------------- */
274 
rtl_ustr_ascii_compareIgnoreAsciiCase(const sal_Unicode * pStr1,const sal_Char * pStr2)275 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1,
276                                                           const sal_Char* pStr2 )
277 {
278     sal_Int32   nRet;
279     sal_Int32   c1;
280     sal_Int32   c2;
281     do
282     {
283         /* If character between 'A' and 'Z', than convert it to lowercase */
284         c1 = (sal_Int32)*pStr1;
285         c2 = (sal_Int32)((unsigned char)*pStr2);
286         if ( (c1 >= 65) && (c1 <= 90) )
287             c1 += 32;
288         if ( (c2 >= 65) && (c2 <= 90) )
289             c2 += 32;
290         nRet = c1-c2;
291         if ( nRet != 0 )
292             return nRet;
293 
294         pStr1++;
295         pStr2++;
296     }
297     while ( c2 );
298 
299     return 0;
300 }
301 
302 /* ----------------------------------------------------------------------- */
303 
rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2)304 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
305                                                                      sal_Int32 nStr1Len,
306                                                                      const sal_Char* pStr2 )
307 {
308     sal_Int32   nRet;
309     sal_Int32   c1;
310     sal_Int32   c2;
311     do
312     {
313         if ( !nStr1Len )
314             return *pStr2 == '\0' ? 0 : -1;
315 
316         /* If character between 'A' and 'Z', than convert it to lowercase */
317         c1 = (sal_Int32)*pStr1;
318         c2 = (sal_Int32)((unsigned char)*pStr2);
319         if ( (c1 >= 65) && (c1 <= 90) )
320             c1 += 32;
321         if ( (c2 >= 65) && (c2 <= 90) )
322             c2 += 32;
323         nRet = c1-c2;
324         if ( nRet != 0 )
325             return nRet;
326 
327         pStr1++;
328         pStr2++;
329         nStr1Len--;
330     }
331     while( c2 );
332 
333     return 0;
334 }
335 
rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(sal_Unicode const * first,sal_Int32 firstLen,char const * second,sal_Int32 secondLen)336 sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
337     sal_Unicode const * first, sal_Int32 firstLen,
338     char const * second, sal_Int32 secondLen)
339 {
340     sal_Int32 i;
341     sal_Int32 len = firstLen < secondLen ? firstLen : secondLen;
342     for (i = 0; i < len; ++i) {
343         sal_Int32 c1 = *first++;
344         sal_Int32 c2 = (unsigned char) *second++;
345         sal_Int32 d;
346         if (c1 >= 65 && c1 <= 90) {
347             c1 += 32;
348         }
349         if (c2 >= 65 && c2 <= 90) {
350             c2 += 32;
351         }
352         d = c1 - c2;
353         if (d != 0) {
354             return d;
355         }
356     }
357     return firstLen - secondLen;
358 }
359 
360 /* ----------------------------------------------------------------------- */
361 
rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nShortenedLength)362 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
363                                                                               sal_Int32 nStr1Len,
364                                                                               const sal_Char* pStr2,
365                                                                               sal_Int32 nShortenedLength )
366 {
367     const sal_Unicode*  pStr1End = pStr1 + nStr1Len;
368     sal_Int32           nRet;
369     sal_Int32           c1;
370     sal_Int32           c2;
371     while ( (nShortenedLength > 0) &&
372             (pStr1 < pStr1End) && *pStr2 )
373     {
374         /* Check ASCII range */
375         OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
376 
377         /* If character between 'A' and 'Z', than convert it to lowercase */
378         c1 = (sal_Int32)*pStr1;
379         c2 = (sal_Int32)((unsigned char)*pStr2);
380         if ( (c1 >= 65) && (c1 <= 90) )
381             c1 += 32;
382         if ( (c2 >= 65) && (c2 <= 90) )
383             c2 += 32;
384         nRet = c1-c2;
385         if ( nRet != 0 )
386             return nRet;
387 
388         nShortenedLength--;
389         pStr1++;
390         pStr2++;
391     }
392 
393     if ( nShortenedLength <= 0 )
394         return 0;
395 
396     if ( *pStr2 )
397     {
398         OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
399         // first is a substring of the second string => less (negative value)
400         nRet = -1;
401     }
402     else
403     {
404         // greater or equal
405         nRet = pStr1End - pStr1;
406     }
407 
408     return nRet;
409 }
410 
411 /* ----------------------------------------------------------------------- */
412 
rtl_uString_newFromAscii(rtl_uString ** ppThis,const sal_Char * pCharStr)413 void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis,
414                                         const sal_Char* pCharStr )
415 {
416     sal_Int32 nLen;
417 
418     if ( pCharStr )
419     {
420         const sal_Char* pTempStr = pCharStr;
421         while( *pTempStr )
422             pTempStr++;
423         nLen = pTempStr-pCharStr;
424     }
425     else
426         nLen = 0;
427 
428     if ( !nLen )
429     {
430         IMPL_RTL_STRINGNAME( new )( ppThis );
431         return;
432     }
433 
434     if ( *ppThis )
435         IMPL_RTL_STRINGNAME( release )( *ppThis );
436 
437     *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
438     OSL_ASSERT(*ppThis != NULL);
439     if ( (*ppThis) )
440     {
441         IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer;
442         do
443         {
444             /* Check ASCII range */
445             OSL_ENSURE( ((unsigned char)*pCharStr) <= 127,
446                         "rtl_uString_newFromAscii() - Found ASCII char > 127" );
447 
448             *pBuffer = *pCharStr;
449             pBuffer++;
450             pCharStr++;
451         }
452         while ( *pCharStr );
453     }
454 }
455 
rtl_uString_newFromCodePoints(rtl_uString ** newString,sal_uInt32 const * codePoints,sal_Int32 codePointCount)456 void SAL_CALL rtl_uString_newFromCodePoints(
457     rtl_uString ** newString, sal_uInt32 const * codePoints,
458     sal_Int32 codePointCount)
459 {
460     sal_Int32 n;
461     sal_Int32 i;
462     sal_Unicode * p;
463     OSL_ASSERT(
464         newString != NULL &&
465         (codePoints != NULL || codePointCount == 0) &&
466         codePointCount >= 0);
467     if (codePointCount == 0) {
468         rtl_uString_new(newString);
469         return;
470     }
471     if (*newString != NULL) {
472         rtl_uString_release(*newString);
473     }
474     n = codePointCount;
475     for (i = 0; i < codePointCount; ++i) {
476         OSL_ASSERT(codePoints[i] <= 0x10FFFF);
477         if (codePoints[i] >= 0x10000) {
478             ++n;
479         }
480     }
481     /* Builds on the assumption that sal_Int32 uses 32 bit two's complement
482        representation with wrap around (the necessary number of UTF-16 code
483        units will be no larger than 2 * SAL_MAX_INT32, represented as
484        sal_Int32 -2): */
485     if (n < 0) {
486         *newString = NULL;
487         return;
488     }
489     *newString = rtl_uString_ImplAlloc(n);
490     if (*newString == NULL) {
491         return;
492     }
493     p = (*newString)->buffer;
494     for (i = 0; i < codePointCount; ++i) {
495         sal_uInt32 c = codePoints[i];
496         if (c < 0x10000) {
497             *p++ = (sal_Unicode) c;
498         } else {
499             c -= 0x10000;
500             *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
501             *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
502         }
503     }
504 }
505 
506 /* ======================================================================= */
507 
rtl_ImplGetFastUTF8UnicodeLen(const sal_Char * pStr,sal_Int32 nLen)508 static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen )
509 {
510     int             n;
511     sal_uChar       c;
512     const sal_Char* pEndStr;
513 
514     n = 0;
515     pEndStr  = pStr+nLen;
516     while ( pStr < pEndStr )
517     {
518         c = (sal_uChar)*pStr;
519 
520         if ( !(c & 0x80) )
521             pStr++;
522         else if ( (c & 0xE0) == 0xC0 )
523             pStr += 2;
524         else if ( (c & 0xF0) == 0xE0 )
525             pStr += 3;
526         else if ( (c & 0xF8) == 0xF0 )
527             pStr += 4;
528         else if ( (c & 0xFC) == 0xF8 )
529             pStr += 5;
530         else if ( (c & 0xFE) == 0xFC )
531             pStr += 6;
532         else
533             pStr++;
534 
535         n++;
536     }
537 
538     return n;
539 }
540 
541 /* ----------------------------------------------------------------------- */
542 
rtl_string2UString_status(rtl_uString ** ppThis,const sal_Char * pStr,sal_Int32 nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags,sal_uInt32 * pInfo)543 static void rtl_string2UString_status( rtl_uString** ppThis,
544                                        const sal_Char* pStr,
545                                        sal_Int32 nLen,
546                                        rtl_TextEncoding eTextEncoding,
547                                        sal_uInt32 nCvtFlags,
548                                        sal_uInt32 *pInfo )
549 {
550     OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding),
551                "rtl_string2UString_status() - Wrong TextEncoding" );
552 
553     if ( !nLen )
554     {
555         rtl_uString_new( ppThis );
556         if (pInfo != NULL) {
557             *pInfo = 0;
558         }
559     }
560     else
561     {
562         if ( *ppThis )
563             IMPL_RTL_STRINGNAME( release )( *ppThis );
564 
565         /* Optimization for US-ASCII */
566         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
567         {
568             IMPL_RTL_STRCODE* pBuffer;
569             *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
570             if (*ppThis == NULL) {
571                 if (pInfo != NULL) {
572                     *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
573                         RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
574                 }
575                 return;
576             }
577             pBuffer = (*ppThis)->buffer;
578             do
579             {
580                 /* Check ASCII range */
581                 OSL_ENSURE( ((unsigned char)*pStr) <= 127,
582                             "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
583 
584                 *pBuffer = *pStr;
585                 pBuffer++;
586                 pStr++;
587                 nLen--;
588             }
589             while ( nLen );
590             if (pInfo != NULL) {
591                 *pInfo = 0;
592             }
593         }
594         else
595         {
596             rtl_uString*                pTemp;
597             rtl_uString*                pTemp2 = NULL;
598             rtl_TextToUnicodeConverter  hConverter;
599             sal_uInt32                  nInfo;
600             sal_Size                    nSrcBytes;
601             sal_Size                    nDestChars;
602             sal_Size                    nNewLen;
603 
604             /* Optimization for UTF-8 - we try to calculate the exact length */
605             /* For all other encoding we try the maximum - and reallocate
606                the buffer if needed */
607             if ( eTextEncoding == RTL_TEXTENCODING_UTF8 )
608             {
609                 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen );
610                 /* Includes the string only ASCII, then we could copy
611                    the buffer faster */
612                 if ( nNewLen == (sal_Size)nLen )
613                 {
614                     IMPL_RTL_STRCODE* pBuffer;
615                     *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
616                     if (*ppThis == NULL)
617                     {
618                         if (pInfo != NULL) {
619                             *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
620                                 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
621                         }
622                         return;
623                     }
624                     pBuffer = (*ppThis)->buffer;
625                     do
626                     {
627                         /* Check ASCII range */
628                         OSL_ENSURE( ((unsigned char)*pStr) <= 127,
629                                     "rtl_string2UString_status() - UTF8 test encoding is wrong" );
630 
631                         *pBuffer = *pStr;
632                         pBuffer++;
633                         pStr++;
634                         nLen--;
635                     }
636                     while ( nLen );
637                     if (pInfo != NULL) {
638                         *pInfo = 0;
639                     }
640                     return;
641                 }
642             }
643             else
644                 nNewLen = nLen;
645 
646             nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH;
647             hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
648 
649             pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
650             if (pTemp == NULL) {
651                 if (pInfo != NULL) {
652                     *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
653                         RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
654                 }
655                 return;
656             }
657             nDestChars = rtl_convertTextToUnicode( hConverter, 0,
658                                                    pStr, nLen,
659                                                    pTemp->buffer, nNewLen,
660                                                    nCvtFlags,
661                                                    &nInfo, &nSrcBytes );
662 
663             /* Buffer not big enough, try again with enough space */
664             /* Shouldn't be the case, but if we get textencoding which
665                could results in more unicode characters we have this
666                code here. Could be the case for apple encodings */
667             while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL )
668             {
669                 rtl_freeMemory( pTemp );
670                 nNewLen += 8;
671                 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
672                 if (pTemp == NULL) {
673                     if (pInfo != NULL) {
674                         *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
675                             RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
676                     }
677                     return;
678                 }
679                 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
680                                                        pStr, nLen,
681                                                        pTemp->buffer, nNewLen,
682                                                        nCvtFlags,
683                                                        &nInfo, &nSrcBytes );
684             }
685 
686             if (pInfo)
687                 *pInfo = nInfo;
688 
689             /* Set the buffer to the correct size or if there is too
690                much overhead, reallocate to the correct size */
691             if ( nNewLen > nDestChars+8 )
692             {
693                 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars );
694             }
695             if (pTemp2 != NULL)
696             {
697                 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars);
698                 rtl_freeMemory(pTemp);
699                 pTemp = pTemp2;
700             }
701             else
702             {
703                 pTemp->length = nDestChars;
704                 pTemp->buffer[nDestChars] = 0;
705             }
706 
707             rtl_destroyTextToUnicodeConverter( hConverter );
708             *ppThis = pTemp;
709 
710             /* Results the conversion in an empty buffer -
711                create an empty string */
712             if ( pTemp && !nDestChars )
713                 rtl_uString_new( ppThis );
714         }
715     }
716 }
717 
rtl_string2UString(rtl_uString ** ppThis,const sal_Char * pStr,sal_Int32 nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)718 void SAL_CALL rtl_string2UString( rtl_uString** ppThis,
719                                   const sal_Char* pStr,
720                                   sal_Int32 nLen,
721                                   rtl_TextEncoding eTextEncoding,
722                                   sal_uInt32 nCvtFlags )
723 {
724     rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding,
725                                nCvtFlags, NULL );
726 }
727 
728 /* ----------------------------------------------------------------------- */
729 
730 typedef enum {
731     CANNOT_RETURN,
732     CAN_RETURN = 1
733 } StrLifecycle;
734 
735 static oslMutex
getInternMutex()736 getInternMutex()
737 {
738     static oslMutex pPoolGuard = NULL;
739     if( !pPoolGuard )
740     {
741         oslMutex pGlobalGuard;
742         pGlobalGuard = *osl_getGlobalMutex();
743         osl_acquireMutex( pGlobalGuard );
744         if( !pPoolGuard )
745         {
746             oslMutex p = osl_createMutex();
747             OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
748             pPoolGuard = p;
749         }
750         osl_releaseMutex( pGlobalGuard );
751     }
752     else
753     {
754         OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
755     }
756 
757     return pPoolGuard;
758 }
759 
760 /* returns true if we found a dup in the pool */
rtl_ustring_intern_internal(rtl_uString ** newStr,rtl_uString * str,StrLifecycle can_return)761 static void rtl_ustring_intern_internal( rtl_uString ** newStr,
762                                          rtl_uString  * str,
763                                          StrLifecycle   can_return )
764 {
765     oslMutex pPoolMutex;
766 
767     pPoolMutex = getInternMutex();
768 
769     osl_acquireMutex( pPoolMutex );
770 
771     *newStr = rtl_str_hash_intern (str, can_return);
772 
773     osl_releaseMutex( pPoolMutex );
774 
775     if( can_return && *newStr != str )
776     { /* we dupped, then found a match */
777         rtl_freeMemory( str );
778     }
779 }
780 
rtl_uString_intern(rtl_uString ** newStr,rtl_uString * str)781 void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
782                                   rtl_uString  * str)
783 {
784     if (SAL_STRING_IS_INTERN(str))
785     {
786         IMPL_RTL_AQUIRE( str );
787         *newStr = str;
788     }
789     else
790     {
791         rtl_uString *pOrg = *newStr;
792         *newStr = NULL;
793         rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN );
794         if (pOrg)
795             rtl_uString_release (pOrg);
796     }
797 }
798 
rtl_uString_internConvert(rtl_uString ** newStr,const sal_Char * str,sal_Int32 len,rtl_TextEncoding eTextEncoding,sal_uInt32 convertFlags,sal_uInt32 * pInfo)799 void SAL_CALL rtl_uString_internConvert( rtl_uString   ** newStr,
800                                          const sal_Char * str,
801                                          sal_Int32        len,
802                                          rtl_TextEncoding eTextEncoding,
803                                          sal_uInt32       convertFlags,
804                                          sal_uInt32     * pInfo )
805 {
806     rtl_uString *scratch;
807 
808     if (*newStr)
809     {
810         rtl_uString_release (*newStr);
811         *newStr = NULL;
812     }
813 
814     if ( len < 256 )
815     { // try various optimisations
816         if ( len < 0 )
817             len = strlen( str );
818         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
819         {
820             int i;
821             rtl_uString *pScratch;
822             pScratch = alloca( sizeof( rtl_uString )
823                                + len * sizeof (IMPL_RTL_STRCODE ) );
824             for (i = 0; i < len; i++)
825             {
826                 /* Check ASCII range */
827                 OSL_ENSURE( ((unsigned char)str[i]) <= 127,
828                             "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
829                 pScratch->buffer[i] = str[i];
830             }
831             pScratch->length = len;
832             rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
833             return;
834         }
835         /* FIXME: we want a nice UTF-8 / alloca shortcut here */
836     }
837 
838     scratch = NULL;
839     rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags,
840                                pInfo );
841     if (!scratch) {
842         return;
843     }
844     rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN );
845 }
846 
847 static void
internRelease(rtl_uString * pThis)848 internRelease (rtl_uString *pThis)
849 {
850     oslMutex pPoolMutex;
851 
852     rtl_uString *pFree = NULL;
853     if ( SAL_STRING_REFCOUNT(
854              osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0)
855     {
856         pPoolMutex = getInternMutex();
857         osl_acquireMutex( pPoolMutex );
858 
859         rtl_str_hash_remove (pThis);
860 
861         /* May have been separately acquired */
862         if ( SAL_STRING_REFCOUNT(
863                  osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 )
864         {
865             /* we got the last ref */
866             pFree = pThis;
867         }
868         else /* very unusual */
869         {
870             internRelease (pThis);
871         }
872 
873         osl_releaseMutex( pPoolMutex );
874     }
875     if (pFree)
876         rtl_freeMemory (pFree);
877 }
878 
rtl_uString_iterateCodePoints(rtl_uString const * string,sal_Int32 * indexUtf16,sal_Int32 incrementCodePoints)879 sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
880     rtl_uString const * string, sal_Int32 * indexUtf16,
881     sal_Int32 incrementCodePoints)
882 {
883     sal_Int32 n;
884     sal_Unicode cu;
885     sal_uInt32 cp;
886     OSL_ASSERT(string != NULL && indexUtf16 != NULL);
887     n = *indexUtf16;
888     OSL_ASSERT(n >= 0 && n <= string->length);
889     while (incrementCodePoints < 0) {
890         OSL_ASSERT(n > 0);
891         cu = string->buffer[--n];
892         if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 &&
893             SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1]))
894         {
895             --n;
896         }
897         ++incrementCodePoints;
898     }
899     OSL_ASSERT(n >= 0 && n < string->length);
900     cu = string->buffer[n];
901     if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 &&
902         SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1]))
903     {
904         cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]);
905     } else {
906         cp = cu;
907     }
908     while (incrementCodePoints > 0) {
909         OSL_ASSERT(n < string->length);
910         cu = string->buffer[n++];
911         if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length &&
912             SAL_RTL_IS_LOW_SURROGATE(string->buffer[n]))
913         {
914             ++n;
915         }
916         --incrementCodePoints;
917     }
918     OSL_ASSERT(n >= 0 && n <= string->length);
919     *indexUtf16 = n;
920     return cp;
921 }
922 
rtl_convertStringToUString(rtl_uString ** target,char const * source,sal_Int32 length,rtl_TextEncoding encoding,sal_uInt32 flags)923 sal_Bool rtl_convertStringToUString(
924     rtl_uString ** target, char const * source, sal_Int32 length,
925     rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C()
926 {
927     sal_uInt32 info;
928     rtl_string2UString_status(target, source, length, encoding, flags, &info);
929     return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0);
930 }
931