xref: /aoo41x/main/tools/source/string/strascii.cxx (revision cdf0e10c)
1 #/*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // no include "precompiled_tools.hxx" because this is included in other cxx files.
29 
30 // =======================================================================
31 
32 #ifdef DBG_UTIL
33 
34 static sal_Bool ImplDbgCheckAsciiStr( const sal_Char* pAsciiStr, sal_Int32 nLen )
35 {
36     while ( nLen && *pAsciiStr )
37     {
38         if ( ((unsigned char)*pAsciiStr) > 127 )
39             return sal_False;
40         ++pAsciiStr,
41         --nLen;
42     }
43 
44     return sal_True;
45 }
46 
47 #endif
48 
49 // =======================================================================
50 
51 static void ImplCopyAsciiStr( sal_Unicode* pDest, const sal_Char* pSrc,
52                               sal_Int32 nLen )
53 {
54     DBG_ASSERT( ImplDbgCheckAsciiStr( pSrc, nLen ),
55                 "UniString::CopyAsciiStr() - pAsciiStr include characters > 127" );
56 
57     while ( nLen )
58     {
59         *pDest = (unsigned char)*pSrc;
60         ++pDest,
61         ++pSrc,
62         --nLen;
63     }
64 }
65 
66 // =======================================================================
67 
68 static sal_Int32 ImplStringCompareAscii( const sal_Unicode* pStr1, const sal_Char* pStr2 )
69 {
70     sal_Int32 nRet;
71     while ( ((nRet = ((sal_Int32)*pStr1)-((sal_Int32)((unsigned char)*pStr2))) == 0) &&
72             *pStr2 )
73     {
74         ++pStr1,
75         ++pStr2;
76     }
77 
78     return nRet;
79 }
80 
81 // -----------------------------------------------------------------------
82 
83 static sal_Int32 ImplStringCompareAscii( const sal_Unicode* pStr1, const sal_Char* pStr2,
84                                          xub_StrLen nCount )
85 {
86     sal_Int32 nRet = 0;
87     while ( nCount &&
88             ((nRet = ((sal_Int32)*pStr1)-((sal_Int32)((unsigned char)*pStr2))) == 0) &&
89             *pStr2 )
90     {
91         ++pStr1,
92         ++pStr2,
93         --nCount;
94     }
95 
96     return nRet;
97 }
98 
99 // -----------------------------------------------------------------------
100 
101 static sal_Int32 ImplStringCompareWithoutZeroAscii( const sal_Unicode* pStr1, const sal_Char* pStr2,
102                                                     xub_StrLen nCount )
103 {
104     sal_Int32 nRet = 0;
105     while ( nCount &&
106             ((nRet = ((sal_Int32)*pStr1)-((sal_Int32)((unsigned char)*pStr2))) == 0) )
107     {
108         ++pStr1,
109         ++pStr2,
110         --nCount;
111     }
112 
113     return nRet;
114 }
115 
116 // -----------------------------------------------------------------------
117 
118 static sal_Int32 ImplStringICompareAscii( const sal_Unicode* pStr1, const sal_Char* pStr2 )
119 {
120     sal_Int32   nRet;
121     sal_Unicode c1;
122     sal_Char    c2;
123     do
124     {
125         // Ist das Zeichen zwischen 'A' und 'Z' dann umwandeln
126         c1 = *pStr1;
127         c2 = *pStr2;
128         if ( (c1 >= 65) && (c1 <= 90) )
129             c1 += 32;
130         if ( (c2 >= 65) && (c2 <= 90) )
131             c2 += 32;
132         nRet = ((sal_Int32)c1)-((sal_Int32)((unsigned char)c2));
133         if ( nRet != 0 )
134             break;
135 
136         ++pStr1,
137         ++pStr2;
138     }
139     while ( c2 );
140 
141     return nRet;
142 }
143 
144 // -----------------------------------------------------------------------
145 
146 static sal_Int32 ImplStringICompareAscii( const sal_Unicode* pStr1, const sal_Char* pStr2,
147                                           xub_StrLen nCount )
148 {
149     sal_Int32   nRet = 0;
150     sal_Unicode c1;
151     sal_Char    c2;
152     do
153     {
154         if ( !nCount )
155             break;
156 
157         // Ist das Zeichen zwischen 'A' und 'Z' dann umwandeln
158         c1 = *pStr1;
159         c2 = *pStr2;
160         if ( (c1 >= 65) && (c1 <= 90) )
161             c1 += 32;
162         if ( (c2 >= 65) && (c2 <= 90) )
163             c2 += 32;
164         nRet = ((sal_Int32)c1)-((sal_Int32)((unsigned char)c2));
165         if ( nRet != 0 )
166             break;
167 
168         ++pStr1,
169         ++pStr2,
170         --nCount;
171     }
172     while ( c2 );
173 
174     return nRet;
175 }
176 
177 // =======================================================================
178 
179 UniString UniString::CreateFromAscii( const sal_Char* pAsciiStr )
180 {
181     DBG_ASSERT( pAsciiStr, "UniString::CreateFromAscii() - pAsciiStr is NULL" );
182 
183     // Stringlaenge ermitteln
184     xub_StrLen nLen = ImplStringLen( pAsciiStr );
185 
186     UniString aTempStr;
187     if ( nLen )
188     {
189         ImplCopyAsciiStr( aTempStr.AllocBuffer( nLen ), pAsciiStr, nLen );
190     }
191     return aTempStr;
192 }
193 
194 // -----------------------------------------------------------------------
195 
196 UniString UniString::CreateFromAscii( const sal_Char* pAsciiStr, xub_StrLen nLen )
197 {
198     DBG_ASSERT( pAsciiStr, "UniString::CreateFromAscii() - pAsciiStr is NULL" );
199 
200     // Stringlaenge ermitteln
201     if ( nLen == STRING_LEN )
202         nLen = ImplStringLen( pAsciiStr );
203 
204     UniString aTempStr;
205 
206     if ( nLen )
207     {
208         ImplCopyAsciiStr( aTempStr.AllocBuffer( nLen ), pAsciiStr, nLen );
209     }
210     return aTempStr;
211 }
212 
213 // -----------------------------------------------------------------------
214 
215 UniString& UniString::AssignAscii( const sal_Char* pAsciiStr )
216 {
217     DBG_CHKTHIS( UniString, DbgCheckUniString );
218     DBG_ASSERT( pAsciiStr, "UniString::AssignAscii() - pAsciiStr is NULL" );
219 
220     // Stringlaenge ermitteln
221     xub_StrLen nLen = ImplStringLen( pAsciiStr );
222 
223     if ( !nLen )
224     {
225 		STRING_NEW((STRING_TYPE **)&mpData);
226     }
227     else
228     {
229         // Wenn String genauso lang ist, wie der String, dann direkt kopieren
230         if ( (nLen == mpData->mnLen) && (mpData->mnRefCount == 1) )
231             ImplCopyAsciiStr( mpData->maStr, pAsciiStr, nLen );
232         else
233         {
234             // Alte Daten loeschen
235             STRING_RELEASE((STRING_TYPE *)mpData);
236 
237             // Daten initialisieren und String kopieren
238             mpData = ImplAllocData( nLen );
239             ImplCopyAsciiStr( mpData->maStr, pAsciiStr, nLen );
240         }
241     }
242 
243     return *this;
244 }
245 
246 // -----------------------------------------------------------------------
247 
248 UniString& UniString::AssignAscii( const sal_Char* pAsciiStr, xub_StrLen nLen )
249 {
250     DBG_CHKTHIS( UniString, DbgCheckUniString );
251     DBG_ASSERT( pAsciiStr, "UniString::AssignAscii() - pAsciiStr is NULL" );
252 
253     if ( nLen == STRING_LEN )
254         nLen = ImplStringLen( pAsciiStr );
255 
256 #ifdef DBG_UTIL
257     if ( DbgIsAssert() )
258     {
259         for ( xub_StrLen i = 0; i < nLen; ++i )
260         {
261             if ( !pAsciiStr[i] )
262             {
263                 DBG_ERROR( "UniString::AssignAscii() : nLen is wrong" );
264             }
265         }
266     }
267 #endif
268 
269     if ( !nLen )
270     {
271 		STRING_NEW((STRING_TYPE **)&mpData);
272     }
273     else
274     {
275         // Wenn String genauso lang ist, wie der String, dann direkt kopieren
276         if ( (nLen == mpData->mnLen) && (mpData->mnRefCount == 1) )
277             ImplCopyAsciiStr( mpData->maStr, pAsciiStr, nLen );
278         else
279         {
280             // Alte Daten loeschen
281             STRING_RELEASE((STRING_TYPE *)mpData);
282 
283             // Daten initialisieren und String kopieren
284             mpData = ImplAllocData( nLen );
285             ImplCopyAsciiStr( mpData->maStr, pAsciiStr, nLen );
286         }
287     }
288 
289     return *this;
290 }
291 
292 // -----------------------------------------------------------------------
293 
294 UniString& UniString::AppendAscii( const sal_Char* pAsciiStr )
295 {
296     DBG_CHKTHIS( UniString, DbgCheckUniString );
297     DBG_ASSERT( pAsciiStr, "UniString::AppendAscii() - pAsciiStr is NULL" );
298 
299     // Stringlaenge ermitteln
300     sal_Int32 nCopyLen = ImplStringLen( pAsciiStr );
301 
302     // Ueberlauf abfangen
303     nCopyLen = ImplGetCopyLen( mpData->mnLen, nCopyLen );
304 
305     // Ist es kein leerer String
306     if ( nCopyLen )
307     {
308         // Neue Datenstruktur und neuen String erzeugen
309         UniStringData* pNewData = ImplAllocData( mpData->mnLen+nCopyLen );
310 
311         // String kopieren
312         memcpy( pNewData->maStr, mpData->maStr, mpData->mnLen*sizeof( sal_Unicode ) );
313         ImplCopyAsciiStr( pNewData->maStr+mpData->mnLen, pAsciiStr, nCopyLen );
314 
315         // Alte Daten loeschen und Neue zuweisen
316         STRING_RELEASE((STRING_TYPE *)mpData);
317         mpData = pNewData;
318     }
319 
320     return *this;
321 }
322 
323 // -----------------------------------------------------------------------
324 
325 UniString& UniString::AppendAscii( const sal_Char* pAsciiStr, xub_StrLen nLen )
326 {
327     DBG_CHKTHIS( UniString, DbgCheckUniString );
328     DBG_ASSERT( pAsciiStr, "UniString::AppendAscii() - pAsciiStr is NULL" );
329 
330     if ( nLen == STRING_LEN )
331         nLen = ImplStringLen( pAsciiStr );
332 
333 #ifdef DBG_UTIL
334     if ( DbgIsAssert() )
335     {
336         for ( xub_StrLen i = 0; i < nLen; ++i )
337         {
338             if ( !pAsciiStr[i] )
339             {
340                 DBG_ERROR( "UniString::AppendAscii() : nLen is wrong" );
341             }
342         }
343     }
344 #endif
345 
346     // Ueberlauf abfangen
347     sal_Int32 nCopyLen = ImplGetCopyLen( mpData->mnLen, nLen );
348 
349     // Ist es kein leerer String
350     if ( nCopyLen )
351     {
352         // Neue Datenstruktur und neuen String erzeugen
353         UniStringData* pNewData = ImplAllocData( mpData->mnLen+nCopyLen );
354 
355         // String kopieren
356         memcpy( pNewData->maStr, mpData->maStr, mpData->mnLen*sizeof( sal_Unicode ) );
357         ImplCopyAsciiStr( pNewData->maStr+mpData->mnLen, pAsciiStr, nCopyLen );
358 
359         // Alte Daten loeschen und Neue zuweisen
360         STRING_RELEASE((STRING_TYPE *)mpData);
361         mpData = pNewData;
362     }
363 
364     return *this;
365 }
366 
367 // -----------------------------------------------------------------------
368 
369 UniString& UniString::InsertAscii( const char* pAsciiStr, xub_StrLen nIndex )
370 {
371     DBG_CHKTHIS( UniString, DbgCheckUniString );
372     DBG_ASSERT( pAsciiStr, "UniString::InsertAscii() - pAsciiStr is NULL" );
373 
374     // Stringlaenge ermitteln
375     sal_Int32 nCopyLen = ImplStringLen( pAsciiStr );
376 
377     // Ueberlauf abfangen
378     nCopyLen = ImplGetCopyLen( mpData->mnLen, nCopyLen );
379 
380     // Ist der einzufuegende String ein Leerstring
381     if ( !nCopyLen )
382         return *this;
383 
384     // Index groesser als Laenge
385     if ( nIndex > mpData->mnLen )
386         nIndex = static_cast< xub_StrLen >(mpData->mnLen);
387 
388     // Neue Laenge ermitteln und neuen String anlegen
389     UniStringData* pNewData = ImplAllocData( mpData->mnLen+nCopyLen );
390 
391     // String kopieren
392     memcpy( pNewData->maStr, mpData->maStr, nIndex*sizeof( sal_Unicode ) );
393     ImplCopyAsciiStr( pNewData->maStr+nIndex, pAsciiStr, nCopyLen );
394     memcpy( pNewData->maStr+nIndex+nCopyLen, mpData->maStr+nIndex,
395             (mpData->mnLen-nIndex)*sizeof( sal_Unicode ) );
396 
397     // Alte Daten loeschen und Neue zuweisen
398     STRING_RELEASE((STRING_TYPE *)mpData);
399     mpData = pNewData;
400 
401     return *this;
402 }
403 
404 // -----------------------------------------------------------------------
405 
406 UniString& UniString::ReplaceAscii( xub_StrLen nIndex, xub_StrLen nCount,
407                                     const sal_Char* pAsciiStr, xub_StrLen nStrLen )
408 {
409     DBG_CHKTHIS( UniString, DbgCheckUniString );
410     DBG_ASSERT( pAsciiStr, "UniString::ReplaceAscii() - pAsciiStr is NULL" );
411 
412     // Wenn Index groessergleich Laenge ist, dann ist es ein Append
413     if ( nIndex >= mpData->mnLen )
414     {
415         AppendAscii( pAsciiStr, nStrLen );
416         return *this;
417     }
418 
419     // Ist es eine Zuweisung
420     if ( (nIndex == 0) && (nCount >= mpData->mnLen) )
421     {
422         AssignAscii( pAsciiStr, nStrLen );
423         return *this;
424     }
425 
426     // Reicht ein Erase
427     if ( nStrLen == STRING_LEN )
428         nStrLen = ImplStringLen( pAsciiStr );
429     if ( !nStrLen )
430         return Erase( nIndex, nCount );
431 
432     // nCount darf nicht ueber das Stringende hinnausgehen
433     if ( nCount > mpData->mnLen - nIndex )
434         nCount = static_cast< xub_StrLen >(mpData->mnLen-nIndex);
435 
436     // Reicht eine zeichenweise Zuweisung
437     if ( nCount == nStrLen )
438     {
439         ImplCopyData();
440         ImplCopyAsciiStr( mpData->maStr+nIndex, pAsciiStr, nStrLen );
441         return *this;
442     }
443 
444     // Ueberlauf abfangen
445     sal_Int32 n = ImplGetCopyLen( mpData->mnLen-nCount, nStrLen );
446 
447     // Neue Daten anlegen
448     STRINGDATA* pNewData = ImplAllocData( mpData->mnLen-nCount+n );
449 
450     // String kopieren
451     memcpy( pNewData->maStr, mpData->maStr, nIndex*sizeof( STRCODE ) );
452     ImplCopyAsciiStr( pNewData->maStr+nIndex, pAsciiStr, n );
453     memcpy( pNewData->maStr+nIndex+n, mpData->maStr+nIndex+nCount,
454             (mpData->mnLen-nIndex-nCount+1)*sizeof( STRCODE ) );
455 
456     // Alte Daten loeschen und Neue zuweisen
457     STRING_RELEASE((STRING_TYPE *)mpData);
458     mpData = pNewData;
459 
460     return *this;
461 }
462 
463 // -----------------------------------------------------------------------
464 
465 StringCompare UniString::CompareToAscii( const sal_Char* pAsciiStr,
466                                          xub_StrLen nLen ) const
467 {
468     DBG_CHKTHIS( UniString, DbgCheckUniString );
469     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, nLen ),
470                 "UniString::CompareToAscii() - pAsciiStr include characters > 127" );
471 
472     // String vergleichen
473     sal_Int32 nCompare = ImplStringCompareAscii( mpData->maStr, pAsciiStr, nLen );
474 
475     // Rueckgabewert anpassen
476     if ( nCompare == 0 )
477         return COMPARE_EQUAL;
478     else if ( nCompare < 0 )
479         return COMPARE_LESS;
480     else
481         return COMPARE_GREATER;
482 }
483 
484 // -----------------------------------------------------------------------
485 
486 StringCompare UniString::CompareIgnoreCaseToAscii( const sal_Char* pAsciiStr,
487                                                    xub_StrLen nLen ) const
488 {
489     DBG_CHKTHIS( UniString, DbgCheckUniString );
490     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, nLen ),
491                 "UniString::CompareIgnoreCaseToAscii() - pAsciiStr include characters > 127" );
492 
493     // String vergleichen
494     sal_Int32 nCompare = ImplStringICompareAscii( mpData->maStr, pAsciiStr, nLen );
495 
496     // Rueckgabewert anpassen
497     if ( nCompare == 0 )
498         return COMPARE_EQUAL;
499     else if ( nCompare < 0 )
500         return COMPARE_LESS;
501     else
502         return COMPARE_GREATER;
503 }
504 
505 // -----------------------------------------------------------------------
506 
507 sal_Bool UniString::EqualsAscii( const sal_Char* pAsciiStr ) const
508 {
509     DBG_CHKTHIS( UniString, DbgCheckUniString );
510     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, STRING_LEN ),
511                 "UniString::EqualsAscii() - pAsciiStr include characters > 127" );
512 
513     return (ImplStringCompareAscii( mpData->maStr, pAsciiStr ) == 0);
514 }
515 
516 // -----------------------------------------------------------------------
517 
518 sal_Bool UniString::EqualsIgnoreCaseAscii( const sal_Char* pAsciiStr ) const
519 {
520     DBG_CHKTHIS( UniString, DbgCheckUniString );
521     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, STRING_LEN ),
522                 "UniString::EqualsIgnoreCaseAscii() - pAsciiStr include characters > 127" );
523 
524     return (ImplStringICompareAscii( mpData->maStr, pAsciiStr ) == 0);
525 }
526 
527 // -----------------------------------------------------------------------
528 
529 sal_Bool UniString::EqualsAscii( const sal_Char* pAsciiStr,
530                              xub_StrLen nIndex, xub_StrLen nLen ) const
531 {
532     DBG_CHKTHIS( UniString, DbgCheckUniString );
533     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, nLen ),
534                 "UniString::EqualsAscii() - pAsciiStr include characters > 127" );
535 
536     // Are there enough codes for comparing?
537     if ( nIndex > mpData->mnLen )
538         return (*pAsciiStr == 0);
539 
540     return (ImplStringCompareAscii( mpData->maStr+nIndex, pAsciiStr, nLen ) == 0);
541 }
542 
543 // -----------------------------------------------------------------------
544 
545 sal_Bool UniString::EqualsIgnoreCaseAscii( const sal_Char* pAsciiStr,
546                                        xub_StrLen nIndex, xub_StrLen nLen ) const
547 {
548     DBG_CHKTHIS( UniString, DbgCheckUniString );
549     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, nLen ),
550                 "UniString::EqualsIgnoreCaseAscii() - pAsciiStr include characters > 127" );
551 
552     // Are there enough codes for comparing?
553     if ( nIndex > mpData->mnLen )
554         return (*pAsciiStr == 0);
555 
556     return (ImplStringICompareAscii( mpData->maStr+nIndex, pAsciiStr, nLen ) == 0);
557 }
558 
559 // -----------------------------------------------------------------------
560 
561 xub_StrLen UniString::SearchAscii( const sal_Char* pAsciiStr, xub_StrLen nIndex ) const
562 {
563     DBG_CHKTHIS( UniString, DbgCheckUniString );
564     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, STRING_LEN ),
565                 "UniString::SearchAscii() - pAsciiStr include characters > 127" );
566 
567     sal_Int32 nLen = mpData->mnLen;
568     xub_StrLen nStrLen  = ImplStringLen( pAsciiStr );
569 
570     // Falls die Laenge des uebergebenen Strings 0 ist oder der Index
571     // hinter dem String liegt, dann wurde der String nicht gefunden
572     if ( !nStrLen || (nIndex >= nLen) )
573         return STRING_NOTFOUND;
574 
575     const sal_Unicode* pStr = mpData->maStr;
576     pStr += nIndex;
577 
578     if ( nStrLen == 1 )
579     {
580         sal_Unicode cSearch = (unsigned char)*pAsciiStr;
581         while ( nIndex < nLen )
582         {
583             if ( *pStr == cSearch )
584                 return nIndex;
585             ++pStr,
586             ++nIndex;
587         }
588     }
589     else
590     {
591         // Nur innerhalb des Strings suchen
592         while ( nLen - nIndex >= nStrLen )
593         {
594             // Stimmt der String ueberein
595             if ( ImplStringCompareWithoutZeroAscii( pStr, pAsciiStr, nStrLen ) == 0 )
596                 return nIndex;
597             ++pStr,
598             ++nIndex;
599         }
600     }
601 
602     return STRING_NOTFOUND;
603 }
604 
605 // -----------------------------------------------------------------------
606 
607 xub_StrLen UniString::SearchAndReplaceAscii( const sal_Char* pAsciiStr, const UniString& rRepStr,
608                                              xub_StrLen nIndex )
609 {
610     DBG_CHKTHIS( UniString, DbgCheckUniString );
611     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, STRING_LEN ),
612                 "UniString::SearchAndReplaceAscii() - pAsciiStr include characters > 127" );
613 
614     xub_StrLen nSPos = SearchAscii( pAsciiStr, nIndex );
615     if ( nSPos != STRING_NOTFOUND )
616         Replace( nSPos, ImplStringLen( pAsciiStr ), rRepStr );
617 
618     return nSPos;
619 }
620 
621 // -----------------------------------------------------------------------
622 
623 void UniString::SearchAndReplaceAllAscii( const sal_Char* pAsciiStr, const UniString& rRepStr )
624 {
625     DBG_CHKTHIS( UniString, DbgCheckUniString );
626     DBG_ASSERT( ImplDbgCheckAsciiStr( pAsciiStr, STRING_LEN ),
627                 "UniString::SearchAndReplaceAllAscii() - pAsciiStr include characters > 127" );
628 
629     xub_StrLen nCharLen = ImplStringLen( pAsciiStr );
630     xub_StrLen nSPos = SearchAscii( pAsciiStr, 0 );
631     while ( nSPos != STRING_NOTFOUND )
632     {
633         Replace( nSPos, nCharLen, rRepStr );
634         nSPos = nSPos + rRepStr.Len();
635         nSPos = SearchAscii( pAsciiStr, nSPos );
636     }
637 }
638