xref: /trunk/main/tools/source/string/strcvt.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // no include "precompiled_tools.hxx" because this is included in other cxx files.
29*cdf0e10cSrcweir 
30*cdf0e10cSrcweir // -----------------------------------------------------------------------
31*cdf0e10cSrcweir 
32*cdf0e10cSrcweir void ByteString::ImplUpdateStringFromUniString(
33*cdf0e10cSrcweir     const sal_Unicode* pUniStr, sal_Size nUniLen,
34*cdf0e10cSrcweir     rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
35*cdf0e10cSrcweir {
36*cdf0e10cSrcweir     ByteStringData* pNewStringData = NULL;
37*cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&pNewStringData),
38*cdf0e10cSrcweir                         pUniStr, nUniLen,
39*cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
40*cdf0e10cSrcweir     STRING_RELEASE((STRING_TYPE *)mpData);
41*cdf0e10cSrcweir     mpData = pNewStringData;
42*cdf0e10cSrcweir }
43*cdf0e10cSrcweir 
44*cdf0e10cSrcweir // =======================================================================
45*cdf0e10cSrcweir 
46*cdf0e10cSrcweir ByteString::ByteString( const UniString& rUniStr, rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
47*cdf0e10cSrcweir {
48*cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
49*cdf0e10cSrcweir     DBG_CHKOBJ( &rUniStr, UniString, DbgCheckUniString );
50*cdf0e10cSrcweir 
51*cdf0e10cSrcweir     mpData = NULL;
52*cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&mpData),
53*cdf0e10cSrcweir                         rUniStr.mpData->maStr, rUniStr.mpData->mnLen,
54*cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
55*cdf0e10cSrcweir }
56*cdf0e10cSrcweir 
57*cdf0e10cSrcweir // -----------------------------------------------------------------------
58*cdf0e10cSrcweir 
59*cdf0e10cSrcweir ByteString::ByteString( const UniString& rUniStr, xub_StrLen nPos, xub_StrLen nLen,
60*cdf0e10cSrcweir                         rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
61*cdf0e10cSrcweir {
62*cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
63*cdf0e10cSrcweir     DBG_CHKOBJ( &rUniStr, UniString, DbgCheckUniString );
64*cdf0e10cSrcweir 
65*cdf0e10cSrcweir     // Stringlaenge ermitteln
66*cdf0e10cSrcweir     if ( nPos > rUniStr.mpData->mnLen )
67*cdf0e10cSrcweir         nLen = 0;
68*cdf0e10cSrcweir     else
69*cdf0e10cSrcweir     {
70*cdf0e10cSrcweir         // Laenge korrigieren, wenn noetig
71*cdf0e10cSrcweir         sal_Int32 nMaxLen = rUniStr.mpData->mnLen-nPos;
72*cdf0e10cSrcweir         if ( nLen > nMaxLen )
73*cdf0e10cSrcweir             nLen = static_cast< xub_StrLen >(nMaxLen);
74*cdf0e10cSrcweir     }
75*cdf0e10cSrcweir 
76*cdf0e10cSrcweir     mpData = NULL;
77*cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&mpData),
78*cdf0e10cSrcweir                         rUniStr.mpData->maStr+nPos, nLen,
79*cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
80*cdf0e10cSrcweir }
81*cdf0e10cSrcweir 
82*cdf0e10cSrcweir // -----------------------------------------------------------------------
83*cdf0e10cSrcweir 
84*cdf0e10cSrcweir ByteString::ByteString( const sal_Unicode* pUniStr,
85*cdf0e10cSrcweir                         rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
86*cdf0e10cSrcweir {
87*cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
88*cdf0e10cSrcweir     DBG_ASSERT( pUniStr, "ByteString::ByteString() - pUniStr is NULL" );
89*cdf0e10cSrcweir 
90*cdf0e10cSrcweir     mpData = NULL;
91*cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&mpData),
92*cdf0e10cSrcweir                         pUniStr, ImplStringLen( pUniStr ),
93*cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
94*cdf0e10cSrcweir }
95*cdf0e10cSrcweir 
96*cdf0e10cSrcweir // -----------------------------------------------------------------------
97*cdf0e10cSrcweir 
98*cdf0e10cSrcweir ByteString::ByteString( const sal_Unicode* pUniStr, xub_StrLen nLen,
99*cdf0e10cSrcweir                         rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
100*cdf0e10cSrcweir {
101*cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
102*cdf0e10cSrcweir     DBG_ASSERT( pUniStr, "ByteString::ByteString() - pUniStr is NULL" );
103*cdf0e10cSrcweir 
104*cdf0e10cSrcweir     if ( nLen == STRING_LEN )
105*cdf0e10cSrcweir         nLen = ImplStringLen( pUniStr );
106*cdf0e10cSrcweir 
107*cdf0e10cSrcweir     mpData = NULL;
108*cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&mpData),
109*cdf0e10cSrcweir                         pUniStr, nLen,
110*cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
111*cdf0e10cSrcweir }
112*cdf0e10cSrcweir 
113*cdf0e10cSrcweir // =======================================================================
114*cdf0e10cSrcweir 
115*cdf0e10cSrcweir static sal_uChar aImplByteTab[256] =
116*cdf0e10cSrcweir {
117*cdf0e10cSrcweir     0,   1,   2,   3,   4,   5,   6,   7,
118*cdf0e10cSrcweir     8,   9,  10,  11,  12,  13,  14,  15,
119*cdf0e10cSrcweir    16,  17,  18,  19,  20,  21,  22,  23,
120*cdf0e10cSrcweir    24,  25,  26,  27,  28,  29,  30,  31,
121*cdf0e10cSrcweir    32,  33,  34,  35,  36,  37,  38,  39,
122*cdf0e10cSrcweir    40,  41,  42,  43,  44,  45,  46,  47,
123*cdf0e10cSrcweir    48,  49,  50,  51,  52,  53,  54,  55,
124*cdf0e10cSrcweir    56,  57,  58,  59,  60,  61,  62,  63,
125*cdf0e10cSrcweir    64,  65,  66,  67,  68,  69,  70,  71,
126*cdf0e10cSrcweir    72,  73,  74,  75,  76,  77,  78,  79,
127*cdf0e10cSrcweir    80,  81,  82,  83,  84,  85,  86,  87,
128*cdf0e10cSrcweir    88,  89,  90,  91,  92,  93,  94,  95,
129*cdf0e10cSrcweir    96,  97,  98,  99, 100, 101, 102, 103,
130*cdf0e10cSrcweir   104, 105, 106, 107, 108, 109, 110, 111,
131*cdf0e10cSrcweir   112, 113, 114, 115, 116, 117, 118, 119,
132*cdf0e10cSrcweir   120, 121, 122, 123, 124, 125, 126, 127,
133*cdf0e10cSrcweir   128, 129, 130, 131, 132, 133, 134, 135,
134*cdf0e10cSrcweir   136, 137, 138, 139, 140, 141, 142, 143,
135*cdf0e10cSrcweir   144, 145, 146, 147, 148, 149, 150, 151,
136*cdf0e10cSrcweir   152, 153, 154, 155, 156, 157, 158, 159,
137*cdf0e10cSrcweir   160, 161, 162, 163, 164, 165, 166, 167,
138*cdf0e10cSrcweir   168, 169, 170, 171, 172, 173, 174, 175,
139*cdf0e10cSrcweir   176, 177, 178, 179, 180, 181, 182, 183,
140*cdf0e10cSrcweir   184, 185, 186, 187, 188, 189, 190, 191,
141*cdf0e10cSrcweir   192, 193, 194, 195, 196, 197, 198, 199,
142*cdf0e10cSrcweir   200, 201, 202, 203, 204, 205, 206, 207,
143*cdf0e10cSrcweir   208, 209, 210, 211, 212, 213, 214, 215,
144*cdf0e10cSrcweir   216, 217, 218, 219, 220, 221, 222, 223,
145*cdf0e10cSrcweir   224, 225, 226, 227, 228, 229, 230, 231,
146*cdf0e10cSrcweir   232, 233, 234, 235, 236, 237, 238, 239,
147*cdf0e10cSrcweir   240, 241, 242, 243, 244, 245, 246, 247,
148*cdf0e10cSrcweir   248, 249, 250, 251, 252, 253, 254, 255
149*cdf0e10cSrcweir };
150*cdf0e10cSrcweir 
151*cdf0e10cSrcweir // =======================================================================
152*cdf0e10cSrcweir 
153*cdf0e10cSrcweir struct Impl1ByteUnicodeTabData
154*cdf0e10cSrcweir {
155*cdf0e10cSrcweir     rtl_TextEncoding            meTextEncoding;
156*cdf0e10cSrcweir     sal_Unicode                 maUniTab[256];
157*cdf0e10cSrcweir     Impl1ByteUnicodeTabData*    mpNext;
158*cdf0e10cSrcweir };
159*cdf0e10cSrcweir 
160*cdf0e10cSrcweir // -----------------------------------------------------------------------
161*cdf0e10cSrcweir 
162*cdf0e10cSrcweir struct Impl1ByteConvertTabData
163*cdf0e10cSrcweir {
164*cdf0e10cSrcweir     rtl_TextEncoding            meSrcTextEncoding;
165*cdf0e10cSrcweir     rtl_TextEncoding            meDestTextEncoding;
166*cdf0e10cSrcweir     sal_uChar                   maConvertTab[256];
167*cdf0e10cSrcweir     sal_uChar                   maRepConvertTab[256];
168*cdf0e10cSrcweir     Impl1ByteConvertTabData*    mpNext;
169*cdf0e10cSrcweir };
170*cdf0e10cSrcweir 
171*cdf0e10cSrcweir // =======================================================================
172*cdf0e10cSrcweir 
173*cdf0e10cSrcweir sal_Unicode* ImplGet1ByteUnicodeTab( rtl_TextEncoding eTextEncoding )
174*cdf0e10cSrcweir {
175*cdf0e10cSrcweir #ifndef BOOTSTRAP
176*cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = ImplGetToolsInData();
177*cdf0e10cSrcweir #else
178*cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = 0x0;
179*cdf0e10cSrcweir #endif
180*cdf0e10cSrcweir     Impl1ByteUnicodeTabData*    pTab = pToolsData->mpFirstUniTabData;
181*cdf0e10cSrcweir 
182*cdf0e10cSrcweir     while ( pTab )
183*cdf0e10cSrcweir     {
184*cdf0e10cSrcweir         if ( pTab->meTextEncoding == eTextEncoding )
185*cdf0e10cSrcweir             return pTab->maUniTab;
186*cdf0e10cSrcweir         pTab = pTab->mpNext;
187*cdf0e10cSrcweir     }
188*cdf0e10cSrcweir 
189*cdf0e10cSrcweir     // get TextEncodingInfo
190*cdf0e10cSrcweir     rtl_TextEncodingInfo aTextEncInfo;
191*cdf0e10cSrcweir     aTextEncInfo.StructSize = sizeof( aTextEncInfo );
192*cdf0e10cSrcweir     rtl_getTextEncodingInfo( eTextEncoding, &aTextEncInfo );
193*cdf0e10cSrcweir 
194*cdf0e10cSrcweir     if ( aTextEncInfo.MaximumCharSize == 1 )
195*cdf0e10cSrcweir     {
196*cdf0e10cSrcweir         pTab = new Impl1ByteUnicodeTabData;
197*cdf0e10cSrcweir         pTab->meTextEncoding = eTextEncoding;
198*cdf0e10cSrcweir         pTab->mpNext = pToolsData->mpFirstUniTabData;
199*cdf0e10cSrcweir 
200*cdf0e10cSrcweir         rtl_TextToUnicodeConverter  hConverter;
201*cdf0e10cSrcweir         sal_uInt32                  nInfo;
202*cdf0e10cSrcweir         sal_Size                    nSrcBytes;
203*cdf0e10cSrcweir         sal_Size                    nDestChars;
204*cdf0e10cSrcweir         hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
205*cdf0e10cSrcweir         nDestChars = rtl_convertTextToUnicode( hConverter, 0,
206*cdf0e10cSrcweir                                                (const sal_Char*)aImplByteTab, 256,
207*cdf0e10cSrcweir                                                pTab->maUniTab, 256,
208*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE |
209*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
210*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
211*cdf0e10cSrcweir                                                &nInfo, &nSrcBytes );
212*cdf0e10cSrcweir         rtl_destroyTextToUnicodeConverter( hConverter );
213*cdf0e10cSrcweir 
214*cdf0e10cSrcweir         if ( (nSrcBytes != 256) || (nDestChars != 256) )
215*cdf0e10cSrcweir             delete pTab;
216*cdf0e10cSrcweir         else
217*cdf0e10cSrcweir         {
218*cdf0e10cSrcweir             pToolsData->mpFirstUniTabData = pTab;
219*cdf0e10cSrcweir             return pTab->maUniTab;
220*cdf0e10cSrcweir         }
221*cdf0e10cSrcweir     }
222*cdf0e10cSrcweir 
223*cdf0e10cSrcweir     return NULL;
224*cdf0e10cSrcweir }
225*cdf0e10cSrcweir 
226*cdf0e10cSrcweir // -----------------------------------------------------------------------
227*cdf0e10cSrcweir 
228*cdf0e10cSrcweir static sal_uChar* ImplGet1ByteConvertTab( rtl_TextEncoding eSrcTextEncoding,
229*cdf0e10cSrcweir                                           rtl_TextEncoding eDestTextEncoding,
230*cdf0e10cSrcweir                                           sal_Bool bReplace )
231*cdf0e10cSrcweir {
232*cdf0e10cSrcweir #ifndef BOOTSTRAP
233*cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = ImplGetToolsInData();
234*cdf0e10cSrcweir #else
235*cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = 0x0;
236*cdf0e10cSrcweir #endif
237*cdf0e10cSrcweir     Impl1ByteConvertTabData*    pTab = pToolsData->mpFirstConvertTabData;
238*cdf0e10cSrcweir 
239*cdf0e10cSrcweir     while ( pTab )
240*cdf0e10cSrcweir     {
241*cdf0e10cSrcweir         if ( (pTab->meSrcTextEncoding == eSrcTextEncoding) &&
242*cdf0e10cSrcweir              (pTab->meDestTextEncoding == eDestTextEncoding) )
243*cdf0e10cSrcweir         {
244*cdf0e10cSrcweir             if ( bReplace )
245*cdf0e10cSrcweir                 return pTab->maRepConvertTab;
246*cdf0e10cSrcweir             else
247*cdf0e10cSrcweir                 return pTab->maConvertTab;
248*cdf0e10cSrcweir         }
249*cdf0e10cSrcweir         pTab = pTab->mpNext;
250*cdf0e10cSrcweir     }
251*cdf0e10cSrcweir 
252*cdf0e10cSrcweir     // get TextEncodingInfo
253*cdf0e10cSrcweir     rtl_TextEncodingInfo aTextEncInfo1;
254*cdf0e10cSrcweir     aTextEncInfo1.StructSize = sizeof( aTextEncInfo1 );
255*cdf0e10cSrcweir     rtl_getTextEncodingInfo( eSrcTextEncoding, &aTextEncInfo1 );
256*cdf0e10cSrcweir     rtl_TextEncodingInfo aTextEncInfo2;
257*cdf0e10cSrcweir     aTextEncInfo2.StructSize = sizeof( aTextEncInfo2 );
258*cdf0e10cSrcweir     rtl_getTextEncodingInfo( eDestTextEncoding, &aTextEncInfo2 );
259*cdf0e10cSrcweir 
260*cdf0e10cSrcweir     if ( (aTextEncInfo1.MaximumCharSize == 1) &&
261*cdf0e10cSrcweir          (aTextEncInfo2.MaximumCharSize == 1) )
262*cdf0e10cSrcweir     {
263*cdf0e10cSrcweir         pTab = new Impl1ByteConvertTabData;
264*cdf0e10cSrcweir         pTab->meSrcTextEncoding = eSrcTextEncoding;
265*cdf0e10cSrcweir         pTab->meDestTextEncoding = eDestTextEncoding;
266*cdf0e10cSrcweir         pTab->mpNext = pToolsData->mpFirstConvertTabData;
267*cdf0e10cSrcweir 
268*cdf0e10cSrcweir         rtl_TextToUnicodeConverter  hConverter;
269*cdf0e10cSrcweir         rtl_UnicodeToTextConverter  hConverter2;
270*cdf0e10cSrcweir         sal_uInt32                  nInfo;
271*cdf0e10cSrcweir         sal_Size                    nSrcBytes;
272*cdf0e10cSrcweir         sal_Size                    nDestChars;
273*cdf0e10cSrcweir         sal_Size                    nSrcChars;
274*cdf0e10cSrcweir         sal_Size                    nDestBytes;
275*cdf0e10cSrcweir         sal_Unicode                 aTempBuf[256];
276*cdf0e10cSrcweir         hConverter = rtl_createTextToUnicodeConverter( eSrcTextEncoding );
277*cdf0e10cSrcweir         nDestChars = rtl_convertTextToUnicode( hConverter, 0,
278*cdf0e10cSrcweir                                                (const sal_Char*)aImplByteTab, 256,
279*cdf0e10cSrcweir                                                aTempBuf, 256,
280*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
281*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
282*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
283*cdf0e10cSrcweir                                                &nInfo, &nSrcBytes );
284*cdf0e10cSrcweir         rtl_destroyTextToUnicodeConverter( hConverter );
285*cdf0e10cSrcweir         if ( (nSrcBytes != 256) || (nDestChars != 256) )
286*cdf0e10cSrcweir             delete pTab;
287*cdf0e10cSrcweir         else
288*cdf0e10cSrcweir         {
289*cdf0e10cSrcweir             hConverter2 = rtl_createUnicodeToTextConverter( eDestTextEncoding );
290*cdf0e10cSrcweir             nDestBytes = rtl_convertUnicodeToText( hConverter2, 0,
291*cdf0e10cSrcweir                                                    aTempBuf, 256,
292*cdf0e10cSrcweir                                                    (sal_Char*)pTab->maConvertTab, 256,
293*cdf0e10cSrcweir                                                    RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0 |
294*cdf0e10cSrcweir                                                    RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT,
295*cdf0e10cSrcweir                                                    &nInfo, &nSrcChars );
296*cdf0e10cSrcweir             if ( (nDestBytes == 256) || (nSrcChars == 256) )
297*cdf0e10cSrcweir             {
298*cdf0e10cSrcweir                 nDestBytes = rtl_convertUnicodeToText( hConverter2, 0,
299*cdf0e10cSrcweir                                                        aTempBuf, 256,
300*cdf0e10cSrcweir                                                        (sal_Char*)pTab->maRepConvertTab, 256,
301*cdf0e10cSrcweir                                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
302*cdf0e10cSrcweir                                                        RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT |
303*cdf0e10cSrcweir                                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE,
304*cdf0e10cSrcweir                                                        &nInfo, &nSrcChars );
305*cdf0e10cSrcweir             }
306*cdf0e10cSrcweir             rtl_destroyUnicodeToTextConverter( hConverter2 );
307*cdf0e10cSrcweir             if ( (nDestBytes != 256) || (nSrcChars != 256) )
308*cdf0e10cSrcweir                 delete pTab;
309*cdf0e10cSrcweir             else
310*cdf0e10cSrcweir             {
311*cdf0e10cSrcweir                 pToolsData->mpFirstConvertTabData = pTab;
312*cdf0e10cSrcweir                 if ( bReplace )
313*cdf0e10cSrcweir                     return pTab->maRepConvertTab;
314*cdf0e10cSrcweir                 else
315*cdf0e10cSrcweir                     return pTab->maConvertTab;
316*cdf0e10cSrcweir             }
317*cdf0e10cSrcweir         }
318*cdf0e10cSrcweir     }
319*cdf0e10cSrcweir 
320*cdf0e10cSrcweir     return NULL;
321*cdf0e10cSrcweir }
322*cdf0e10cSrcweir 
323*cdf0e10cSrcweir // =======================================================================
324*cdf0e10cSrcweir 
325*cdf0e10cSrcweir void ImplDeleteCharTabData()
326*cdf0e10cSrcweir {
327*cdf0e10cSrcweir #ifndef BOOTSTRAP
328*cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = ImplGetToolsInData();
329*cdf0e10cSrcweir #else
330*cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = 0x0;
331*cdf0e10cSrcweir #endif
332*cdf0e10cSrcweir     Impl1ByteUnicodeTabData*    pTempUniTab;
333*cdf0e10cSrcweir     Impl1ByteUnicodeTabData*    pUniTab = pToolsData->mpFirstUniTabData;
334*cdf0e10cSrcweir     while ( pUniTab )
335*cdf0e10cSrcweir     {
336*cdf0e10cSrcweir         pTempUniTab = pUniTab->mpNext;
337*cdf0e10cSrcweir         delete pUniTab;
338*cdf0e10cSrcweir         pUniTab = pTempUniTab;
339*cdf0e10cSrcweir     }
340*cdf0e10cSrcweir     pToolsData->mpFirstUniTabData = NULL;
341*cdf0e10cSrcweir 
342*cdf0e10cSrcweir     Impl1ByteConvertTabData*    pTempConvertTab;
343*cdf0e10cSrcweir     Impl1ByteConvertTabData*    pConvertTab = pToolsData->mpFirstConvertTabData;
344*cdf0e10cSrcweir     while ( pConvertTab )
345*cdf0e10cSrcweir     {
346*cdf0e10cSrcweir         pTempConvertTab = pConvertTab->mpNext;
347*cdf0e10cSrcweir         delete pConvertTab;
348*cdf0e10cSrcweir         pConvertTab = pTempConvertTab;
349*cdf0e10cSrcweir     }
350*cdf0e10cSrcweir     pToolsData->mpFirstConvertTabData = NULL;
351*cdf0e10cSrcweir }
352*cdf0e10cSrcweir 
353*cdf0e10cSrcweir // =======================================================================
354*cdf0e10cSrcweir 
355*cdf0e10cSrcweir void ByteString::ImplStringConvert(
356*cdf0e10cSrcweir     rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace )
357*cdf0e10cSrcweir {
358*cdf0e10cSrcweir     sal_uChar* pConvertTab = ImplGet1ByteConvertTab( eSource, eTarget, bReplace );
359*cdf0e10cSrcweir     if ( pConvertTab )
360*cdf0e10cSrcweir     {
361*cdf0e10cSrcweir         char* pStr = mpData->maStr;
362*cdf0e10cSrcweir         while ( *pStr )
363*cdf0e10cSrcweir         {
364*cdf0e10cSrcweir             sal_uChar c = (sal_uChar)*pStr;
365*cdf0e10cSrcweir             sal_uChar cConv = pConvertTab[c];
366*cdf0e10cSrcweir             if ( c != cConv )
367*cdf0e10cSrcweir             {
368*cdf0e10cSrcweir                 pStr = ImplCopyStringData( pStr );
369*cdf0e10cSrcweir                 *pStr = (char)cConv;
370*cdf0e10cSrcweir             }
371*cdf0e10cSrcweir 
372*cdf0e10cSrcweir             pStr++;
373*cdf0e10cSrcweir         }
374*cdf0e10cSrcweir     }
375*cdf0e10cSrcweir     else
376*cdf0e10cSrcweir     {
377*cdf0e10cSrcweir         rtl_UnicodeToTextConverter  hSrcConverter = rtl_createTextToUnicodeConverter( eSource );
378*cdf0e10cSrcweir         sal_uInt32                  nInfo;
379*cdf0e10cSrcweir         sal_Size                    nSrcBytes;
380*cdf0e10cSrcweir         sal_Size                    nDestChars;
381*cdf0e10cSrcweir         sal_Size                    nTempLen;
382*cdf0e10cSrcweir         sal_Unicode*                pTempBuf;
383*cdf0e10cSrcweir         nTempLen = mpData->mnLen;
384*cdf0e10cSrcweir         pTempBuf = new sal_Unicode[nTempLen];
385*cdf0e10cSrcweir         nDestChars = rtl_convertTextToUnicode( hSrcConverter, 0,
386*cdf0e10cSrcweir                                                mpData->maStr, mpData->mnLen,
387*cdf0e10cSrcweir                                                pTempBuf, nTempLen,
388*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_FLUSH |
389*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE |
390*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
391*cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
392*cdf0e10cSrcweir                                                &nInfo, &nSrcBytes );
393*cdf0e10cSrcweir         rtl_destroyTextToUnicodeConverter( hSrcConverter );
394*cdf0e10cSrcweir         // Hier werten wir bReplace nicht aus, da fuer MultiByte-Textencodings
395*cdf0e10cSrcweir         // sowieso keine Ersatzdarstellung moeglich ist. Da sich der String
396*cdf0e10cSrcweir         // sowieso in der Laenge aendern kann, nehmen wir auch sonst keine
397*cdf0e10cSrcweir         // Ruecksicht darauf, das die Laenge erhalten bleibt.
398*cdf0e10cSrcweir         ImplUpdateStringFromUniString( pTempBuf, nDestChars, eTarget,
399*cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
400*cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT |
401*cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE |
402*cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR |
403*cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 |
404*cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE |
405*cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE );
406*cdf0e10cSrcweir         delete [] pTempBuf;
407*cdf0e10cSrcweir     }
408*cdf0e10cSrcweir }
409*cdf0e10cSrcweir 
410*cdf0e10cSrcweir // =======================================================================
411*cdf0e10cSrcweir 
412*cdf0e10cSrcweir ByteString& ByteString::Convert( rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace )
413*cdf0e10cSrcweir {
414*cdf0e10cSrcweir     DBG_CHKTHIS( ByteString, DbgCheckByteString );
415*cdf0e10cSrcweir 
416*cdf0e10cSrcweir     // rtl_TextEncoding Dontknow kann nicht konvertiert werden
417*cdf0e10cSrcweir     if ( (eSource == RTL_TEXTENCODING_DONTKNOW) || (eTarget == RTL_TEXTENCODING_DONTKNOW) )
418*cdf0e10cSrcweir         return *this;
419*cdf0e10cSrcweir 
420*cdf0e10cSrcweir     // Wenn Source und Target gleich sind, muss nicht konvertiert werden
421*cdf0e10cSrcweir     if ( eSource == eTarget )
422*cdf0e10cSrcweir         return *this;
423*cdf0e10cSrcweir 
424*cdf0e10cSrcweir     // rtl_TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten
425*cdf0e10cSrcweir     // wollen wir die Zeichencodes beibehalten
426*cdf0e10cSrcweir     if ( (eSource == RTL_TEXTENCODING_SYMBOL) &&
427*cdf0e10cSrcweir          (eTarget != RTL_TEXTENCODING_UTF7) &&
428*cdf0e10cSrcweir          (eTarget != RTL_TEXTENCODING_UTF8) )
429*cdf0e10cSrcweir         return *this;
430*cdf0e10cSrcweir     if ( (eTarget == RTL_TEXTENCODING_SYMBOL) &&
431*cdf0e10cSrcweir          (eSource != RTL_TEXTENCODING_UTF7) &&
432*cdf0e10cSrcweir          (eSource != RTL_TEXTENCODING_UTF8) )
433*cdf0e10cSrcweir         return *this;
434*cdf0e10cSrcweir 
435*cdf0e10cSrcweir     // Zeichensatz umwandeln
436*cdf0e10cSrcweir     ImplStringConvert( eSource, eTarget, bReplace );
437*cdf0e10cSrcweir 
438*cdf0e10cSrcweir     return *this;
439*cdf0e10cSrcweir }
440*cdf0e10cSrcweir 
441*cdf0e10cSrcweir // =======================================================================
442*cdf0e10cSrcweir 
443*cdf0e10cSrcweir char ByteString::Convert( char c,
444*cdf0e10cSrcweir                           rtl_TextEncoding eSource, rtl_TextEncoding eTarget,
445*cdf0e10cSrcweir                           sal_Bool bReplace )
446*cdf0e10cSrcweir {
447*cdf0e10cSrcweir     // TextEncoding Dontknow kann nicht konvertiert werden
448*cdf0e10cSrcweir     if ( (eSource == RTL_TEXTENCODING_DONTKNOW) || (eTarget == RTL_TEXTENCODING_DONTKNOW) )
449*cdf0e10cSrcweir         return '\0';
450*cdf0e10cSrcweir 
451*cdf0e10cSrcweir     // Wenn Source und Target gleich sind, muss nicht konvertiert werden
452*cdf0e10cSrcweir     if ( eSource == eTarget )
453*cdf0e10cSrcweir         return c;
454*cdf0e10cSrcweir 
455*cdf0e10cSrcweir     // TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten
456*cdf0e10cSrcweir     // wollen wir die Zeichencodes beibehalten
457*cdf0e10cSrcweir     if ( (eSource == RTL_TEXTENCODING_SYMBOL) &&
458*cdf0e10cSrcweir          (eTarget != RTL_TEXTENCODING_UTF7) &&
459*cdf0e10cSrcweir          (eTarget != RTL_TEXTENCODING_UTF8) )
460*cdf0e10cSrcweir         return '\0';
461*cdf0e10cSrcweir     if ( (eTarget == RTL_TEXTENCODING_SYMBOL) &&
462*cdf0e10cSrcweir          (eSource != RTL_TEXTENCODING_UTF7) &&
463*cdf0e10cSrcweir          (eSource != RTL_TEXTENCODING_UTF8) )
464*cdf0e10cSrcweir         return '\0';
465*cdf0e10cSrcweir 
466*cdf0e10cSrcweir     sal_uChar* pConvertTab = ImplGet1ByteConvertTab( eSource, eTarget, bReplace );
467*cdf0e10cSrcweir     if ( pConvertTab )
468*cdf0e10cSrcweir         return (char)pConvertTab[(sal_uChar)c];
469*cdf0e10cSrcweir     else
470*cdf0e10cSrcweir         return '\0';
471*cdf0e10cSrcweir }
472*cdf0e10cSrcweir 
473*cdf0e10cSrcweir // =======================================================================
474*cdf0e10cSrcweir 
475*cdf0e10cSrcweir sal_Unicode ByteString::ConvertToUnicode( char c, rtl_TextEncoding eTextEncoding )
476*cdf0e10cSrcweir {
477*cdf0e10cSrcweir     sal_Size nLen = 1;
478*cdf0e10cSrcweir     return ConvertToUnicode( &c, &nLen, eTextEncoding );
479*cdf0e10cSrcweir }
480*cdf0e10cSrcweir 
481*cdf0e10cSrcweir // -----------------------------------------------------------------------
482*cdf0e10cSrcweir 
483*cdf0e10cSrcweir char ByteString::ConvertFromUnicode( sal_Unicode c, rtl_TextEncoding eTextEncoding, sal_Bool bReplace )
484*cdf0e10cSrcweir {
485*cdf0e10cSrcweir     sal_Size    nLen;
486*cdf0e10cSrcweir     char        aBuf[30];
487*cdf0e10cSrcweir     nLen = ConvertFromUnicode( c, aBuf, sizeof( aBuf ), eTextEncoding, bReplace );
488*cdf0e10cSrcweir     if ( nLen == 1 )
489*cdf0e10cSrcweir         return aBuf[0];
490*cdf0e10cSrcweir     else
491*cdf0e10cSrcweir         return 0;
492*cdf0e10cSrcweir }
493*cdf0e10cSrcweir 
494*cdf0e10cSrcweir // -----------------------------------------------------------------------
495*cdf0e10cSrcweir 
496*cdf0e10cSrcweir sal_Unicode ByteString::ConvertToUnicode( const char* pChar, sal_Size* pLen, rtl_TextEncoding eTextEncoding )
497*cdf0e10cSrcweir {
498*cdf0e10cSrcweir     // TextEncoding Dontknow wird nicht konvertiert
499*cdf0e10cSrcweir     if ( eTextEncoding == RTL_TEXTENCODING_DONTKNOW )
500*cdf0e10cSrcweir         return 0;
501*cdf0e10cSrcweir 
502*cdf0e10cSrcweir     rtl_TextToUnicodeConverter  hConverter;
503*cdf0e10cSrcweir     sal_uInt32                  nInfo;
504*cdf0e10cSrcweir     sal_Size                    nSrcBytes;
505*cdf0e10cSrcweir     sal_Size                    nDestChars;
506*cdf0e10cSrcweir     sal_Unicode                 nConvChar;
507*cdf0e10cSrcweir     hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
508*cdf0e10cSrcweir     nDestChars = rtl_convertTextToUnicode( hConverter, 0,
509*cdf0e10cSrcweir                                            (const sal_Char*)pChar, *pLen,
510*cdf0e10cSrcweir                                            &nConvChar, 1,
511*cdf0e10cSrcweir                                            RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
512*cdf0e10cSrcweir                                            RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
513*cdf0e10cSrcweir                                            RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
514*cdf0e10cSrcweir                                            RTL_TEXTTOUNICODE_FLAGS_FLUSH,
515*cdf0e10cSrcweir                                            &nInfo, &nSrcBytes );
516*cdf0e10cSrcweir     rtl_destroyTextToUnicodeConverter( hConverter );
517*cdf0e10cSrcweir 
518*cdf0e10cSrcweir     if ( nDestChars == 1 )
519*cdf0e10cSrcweir     {
520*cdf0e10cSrcweir         *pLen = nSrcBytes;
521*cdf0e10cSrcweir         return nConvChar;
522*cdf0e10cSrcweir     }
523*cdf0e10cSrcweir     else
524*cdf0e10cSrcweir     {
525*cdf0e10cSrcweir         *pLen = 0;
526*cdf0e10cSrcweir         return 0;
527*cdf0e10cSrcweir     }
528*cdf0e10cSrcweir }
529*cdf0e10cSrcweir 
530*cdf0e10cSrcweir // -----------------------------------------------------------------------
531*cdf0e10cSrcweir 
532*cdf0e10cSrcweir sal_Size ByteString::ConvertFromUnicode( sal_Unicode c, char* pBuf, sal_Size nBufLen, rtl_TextEncoding eTextEncoding,
533*cdf0e10cSrcweir                                          sal_Bool bReplace )
534*cdf0e10cSrcweir {
535*cdf0e10cSrcweir     // TextEncoding Dontknow wird nicht konvertiert
536*cdf0e10cSrcweir     if ( eTextEncoding == RTL_TEXTENCODING_DONTKNOW )
537*cdf0e10cSrcweir         return '\0';
538*cdf0e10cSrcweir 
539*cdf0e10cSrcweir     rtl_UnicodeToTextConverter  hConverter;
540*cdf0e10cSrcweir     sal_uInt32                  nInfo;
541*cdf0e10cSrcweir     sal_Size                    nSrcChars;
542*cdf0e10cSrcweir     sal_Size                    nDestBytes;
543*cdf0e10cSrcweir     sal_Unicode                 cUni = c;
544*cdf0e10cSrcweir     sal_uInt32                  nFlags = RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE |
545*cdf0e10cSrcweir                                          RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE |
546*cdf0e10cSrcweir                                          RTL_UNICODETOTEXT_FLAGS_FLUSH;
547*cdf0e10cSrcweir     if ( bReplace )
548*cdf0e10cSrcweir     {
549*cdf0e10cSrcweir         nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
550*cdf0e10cSrcweir                   RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT;
551*cdf0e10cSrcweir         nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE;
552*cdf0e10cSrcweir         if ( nBufLen > 1 )
553*cdf0e10cSrcweir             nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR;
554*cdf0e10cSrcweir     }
555*cdf0e10cSrcweir     else
556*cdf0e10cSrcweir     {
557*cdf0e10cSrcweir         nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0 |
558*cdf0e10cSrcweir                   RTL_UNICODETOTEXT_FLAGS_INVALID_0;
559*cdf0e10cSrcweir     }
560*cdf0e10cSrcweir 
561*cdf0e10cSrcweir     hConverter = rtl_createUnicodeToTextConverter( eTextEncoding );
562*cdf0e10cSrcweir     nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
563*cdf0e10cSrcweir                                            &cUni, 1,
564*cdf0e10cSrcweir                                            (sal_Char*)pBuf, nBufLen,
565*cdf0e10cSrcweir                                            nFlags,
566*cdf0e10cSrcweir                                            &nInfo, &nSrcChars );
567*cdf0e10cSrcweir     rtl_destroyUnicodeToTextConverter( hConverter );
568*cdf0e10cSrcweir     return nDestBytes;
569*cdf0e10cSrcweir }
570*cdf0e10cSrcweir 
571*cdf0e10cSrcweir // =======================================================================
572*cdf0e10cSrcweir 
573*cdf0e10cSrcweir ByteString::ByteString( const rtl::OString& rStr )
574*cdf0e10cSrcweir     : mpData(NULL)
575*cdf0e10cSrcweir {
576*cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
577*cdf0e10cSrcweir 
578*cdf0e10cSrcweir     OSL_ENSURE(rStr.pData->length < STRING_MAXLEN,
579*cdf0e10cSrcweir                "Overflowing rtl::OString -> ByteString cut to zero length");
580*cdf0e10cSrcweir 
581*cdf0e10cSrcweir     if (rStr.pData->length < STRING_MAXLEN)
582*cdf0e10cSrcweir     {
583*cdf0e10cSrcweir         mpData = reinterpret_cast< ByteStringData * >(const_cast< rtl::OString & >(rStr).pData);
584*cdf0e10cSrcweir         STRING_ACQUIRE((STRING_TYPE *)mpData);
585*cdf0e10cSrcweir     }
586*cdf0e10cSrcweir     else
587*cdf0e10cSrcweir     {
588*cdf0e10cSrcweir         STRING_NEW((STRING_TYPE **)&mpData);
589*cdf0e10cSrcweir     }
590*cdf0e10cSrcweir }
591*cdf0e10cSrcweir 
592*cdf0e10cSrcweir // -----------------------------------------------------------------------
593*cdf0e10cSrcweir 
594*cdf0e10cSrcweir ByteString& ByteString::Assign( const rtl::OString& rStr )
595*cdf0e10cSrcweir {
596*cdf0e10cSrcweir     DBG_CHKTHIS( ByteString, DbgCheckByteString );
597*cdf0e10cSrcweir 
598*cdf0e10cSrcweir     OSL_ENSURE(rStr.pData->length < STRING_MAXLEN,
599*cdf0e10cSrcweir                "Overflowing rtl::OString -> ByteString cut to zero length");
600*cdf0e10cSrcweir 
601*cdf0e10cSrcweir     if (rStr.pData->length < STRING_MAXLEN)
602*cdf0e10cSrcweir     {
603*cdf0e10cSrcweir         STRING_RELEASE((STRING_TYPE *)mpData);
604*cdf0e10cSrcweir         mpData = reinterpret_cast< ByteStringData * >(const_cast< rtl::OString & >(rStr).pData);
605*cdf0e10cSrcweir         STRING_ACQUIRE((STRING_TYPE *)mpData);
606*cdf0e10cSrcweir     }
607*cdf0e10cSrcweir     else
608*cdf0e10cSrcweir     {
609*cdf0e10cSrcweir         STRING_NEW((STRING_TYPE **)&mpData);
610*cdf0e10cSrcweir     }
611*cdf0e10cSrcweir 
612*cdf0e10cSrcweir     return *this;
613*cdf0e10cSrcweir }
614