xref: /trunk/main/tools/source/string/strcvt.cxx (revision cf6516809c57e1bb0a940545cca99cdad54d4ce2)
1*89b56da7SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*89b56da7SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*89b56da7SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*89b56da7SAndrew Rist  * distributed with this work for additional information
6*89b56da7SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*89b56da7SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*89b56da7SAndrew Rist  * "License"); you may not use this file except in compliance
9*89b56da7SAndrew Rist  * with the License.  You may obtain a copy of the License at
10cdf0e10cSrcweir  *
11*89b56da7SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir  *
13*89b56da7SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*89b56da7SAndrew Rist  * software distributed under the License is distributed on an
15*89b56da7SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*89b56da7SAndrew Rist  * KIND, either express or implied.  See the License for the
17*89b56da7SAndrew Rist  * specific language governing permissions and limitations
18*89b56da7SAndrew Rist  * under the License.
19cdf0e10cSrcweir  *
20*89b56da7SAndrew Rist  *************************************************************/
21*89b56da7SAndrew Rist 
22*89b56da7SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // no include "precompiled_tools.hxx" because this is included in other cxx files.
25cdf0e10cSrcweir 
26cdf0e10cSrcweir // -----------------------------------------------------------------------
27cdf0e10cSrcweir 
ImplUpdateStringFromUniString(const sal_Unicode * pUniStr,sal_Size nUniLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)28cdf0e10cSrcweir void ByteString::ImplUpdateStringFromUniString(
29cdf0e10cSrcweir     const sal_Unicode* pUniStr, sal_Size nUniLen,
30cdf0e10cSrcweir     rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
31cdf0e10cSrcweir {
32cdf0e10cSrcweir     ByteStringData* pNewStringData = NULL;
33cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&pNewStringData),
34cdf0e10cSrcweir                         pUniStr, nUniLen,
35cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
36cdf0e10cSrcweir     STRING_RELEASE((STRING_TYPE *)mpData);
37cdf0e10cSrcweir     mpData = pNewStringData;
38cdf0e10cSrcweir }
39cdf0e10cSrcweir 
40cdf0e10cSrcweir // =======================================================================
41cdf0e10cSrcweir 
ByteString(const UniString & rUniStr,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)42cdf0e10cSrcweir ByteString::ByteString( const UniString& rUniStr, rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
43cdf0e10cSrcweir {
44cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
45cdf0e10cSrcweir     DBG_CHKOBJ( &rUniStr, UniString, DbgCheckUniString );
46cdf0e10cSrcweir 
47cdf0e10cSrcweir     mpData = NULL;
48cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&mpData),
49cdf0e10cSrcweir                         rUniStr.mpData->maStr, rUniStr.mpData->mnLen,
50cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
51cdf0e10cSrcweir }
52cdf0e10cSrcweir 
53cdf0e10cSrcweir // -----------------------------------------------------------------------
54cdf0e10cSrcweir 
ByteString(const UniString & rUniStr,xub_StrLen nPos,xub_StrLen nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)55cdf0e10cSrcweir ByteString::ByteString( const UniString& rUniStr, xub_StrLen nPos, xub_StrLen nLen,
56cdf0e10cSrcweir                         rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
57cdf0e10cSrcweir {
58cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
59cdf0e10cSrcweir     DBG_CHKOBJ( &rUniStr, UniString, DbgCheckUniString );
60cdf0e10cSrcweir 
61cdf0e10cSrcweir     // Stringlaenge ermitteln
62cdf0e10cSrcweir     if ( nPos > rUniStr.mpData->mnLen )
63cdf0e10cSrcweir         nLen = 0;
64cdf0e10cSrcweir     else
65cdf0e10cSrcweir     {
66cdf0e10cSrcweir         // Laenge korrigieren, wenn noetig
67cdf0e10cSrcweir         sal_Int32 nMaxLen = rUniStr.mpData->mnLen-nPos;
68cdf0e10cSrcweir         if ( nLen > nMaxLen )
69cdf0e10cSrcweir             nLen = static_cast< xub_StrLen >(nMaxLen);
70cdf0e10cSrcweir     }
71cdf0e10cSrcweir 
72cdf0e10cSrcweir     mpData = NULL;
73cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&mpData),
74cdf0e10cSrcweir                         rUniStr.mpData->maStr+nPos, nLen,
75cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
76cdf0e10cSrcweir }
77cdf0e10cSrcweir 
78cdf0e10cSrcweir // -----------------------------------------------------------------------
79cdf0e10cSrcweir 
ByteString(const sal_Unicode * pUniStr,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)80cdf0e10cSrcweir ByteString::ByteString( const sal_Unicode* pUniStr,
81cdf0e10cSrcweir                         rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
82cdf0e10cSrcweir {
83cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
84cdf0e10cSrcweir     DBG_ASSERT( pUniStr, "ByteString::ByteString() - pUniStr is NULL" );
85cdf0e10cSrcweir 
86cdf0e10cSrcweir     mpData = NULL;
87cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&mpData),
88cdf0e10cSrcweir                         pUniStr, ImplStringLen( pUniStr ),
89cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
90cdf0e10cSrcweir }
91cdf0e10cSrcweir 
92cdf0e10cSrcweir // -----------------------------------------------------------------------
93cdf0e10cSrcweir 
ByteString(const sal_Unicode * pUniStr,xub_StrLen nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)94cdf0e10cSrcweir ByteString::ByteString( const sal_Unicode* pUniStr, xub_StrLen nLen,
95cdf0e10cSrcweir                         rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
96cdf0e10cSrcweir {
97cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
98cdf0e10cSrcweir     DBG_ASSERT( pUniStr, "ByteString::ByteString() - pUniStr is NULL" );
99cdf0e10cSrcweir 
100cdf0e10cSrcweir     if ( nLen == STRING_LEN )
101cdf0e10cSrcweir         nLen = ImplStringLen( pUniStr );
102cdf0e10cSrcweir 
103cdf0e10cSrcweir     mpData = NULL;
104cdf0e10cSrcweir     rtl_uString2String( (rtl_String **)(&mpData),
105cdf0e10cSrcweir                         pUniStr, nLen,
106cdf0e10cSrcweir                         eTextEncoding, nCvtFlags );
107cdf0e10cSrcweir }
108cdf0e10cSrcweir 
109cdf0e10cSrcweir // =======================================================================
110cdf0e10cSrcweir 
111cdf0e10cSrcweir static sal_uChar aImplByteTab[256] =
112cdf0e10cSrcweir {
113cdf0e10cSrcweir     0,   1,   2,   3,   4,   5,   6,   7,
114cdf0e10cSrcweir     8,   9,  10,  11,  12,  13,  14,  15,
115cdf0e10cSrcweir    16,  17,  18,  19,  20,  21,  22,  23,
116cdf0e10cSrcweir    24,  25,  26,  27,  28,  29,  30,  31,
117cdf0e10cSrcweir    32,  33,  34,  35,  36,  37,  38,  39,
118cdf0e10cSrcweir    40,  41,  42,  43,  44,  45,  46,  47,
119cdf0e10cSrcweir    48,  49,  50,  51,  52,  53,  54,  55,
120cdf0e10cSrcweir    56,  57,  58,  59,  60,  61,  62,  63,
121cdf0e10cSrcweir    64,  65,  66,  67,  68,  69,  70,  71,
122cdf0e10cSrcweir    72,  73,  74,  75,  76,  77,  78,  79,
123cdf0e10cSrcweir    80,  81,  82,  83,  84,  85,  86,  87,
124cdf0e10cSrcweir    88,  89,  90,  91,  92,  93,  94,  95,
125cdf0e10cSrcweir    96,  97,  98,  99, 100, 101, 102, 103,
126cdf0e10cSrcweir   104, 105, 106, 107, 108, 109, 110, 111,
127cdf0e10cSrcweir   112, 113, 114, 115, 116, 117, 118, 119,
128cdf0e10cSrcweir   120, 121, 122, 123, 124, 125, 126, 127,
129cdf0e10cSrcweir   128, 129, 130, 131, 132, 133, 134, 135,
130cdf0e10cSrcweir   136, 137, 138, 139, 140, 141, 142, 143,
131cdf0e10cSrcweir   144, 145, 146, 147, 148, 149, 150, 151,
132cdf0e10cSrcweir   152, 153, 154, 155, 156, 157, 158, 159,
133cdf0e10cSrcweir   160, 161, 162, 163, 164, 165, 166, 167,
134cdf0e10cSrcweir   168, 169, 170, 171, 172, 173, 174, 175,
135cdf0e10cSrcweir   176, 177, 178, 179, 180, 181, 182, 183,
136cdf0e10cSrcweir   184, 185, 186, 187, 188, 189, 190, 191,
137cdf0e10cSrcweir   192, 193, 194, 195, 196, 197, 198, 199,
138cdf0e10cSrcweir   200, 201, 202, 203, 204, 205, 206, 207,
139cdf0e10cSrcweir   208, 209, 210, 211, 212, 213, 214, 215,
140cdf0e10cSrcweir   216, 217, 218, 219, 220, 221, 222, 223,
141cdf0e10cSrcweir   224, 225, 226, 227, 228, 229, 230, 231,
142cdf0e10cSrcweir   232, 233, 234, 235, 236, 237, 238, 239,
143cdf0e10cSrcweir   240, 241, 242, 243, 244, 245, 246, 247,
144cdf0e10cSrcweir   248, 249, 250, 251, 252, 253, 254, 255
145cdf0e10cSrcweir };
146cdf0e10cSrcweir 
147cdf0e10cSrcweir // =======================================================================
148cdf0e10cSrcweir 
149cdf0e10cSrcweir struct Impl1ByteUnicodeTabData
150cdf0e10cSrcweir {
151cdf0e10cSrcweir     rtl_TextEncoding            meTextEncoding;
152cdf0e10cSrcweir     sal_Unicode                 maUniTab[256];
153cdf0e10cSrcweir     Impl1ByteUnicodeTabData*    mpNext;
154cdf0e10cSrcweir };
155cdf0e10cSrcweir 
156cdf0e10cSrcweir // -----------------------------------------------------------------------
157cdf0e10cSrcweir 
158cdf0e10cSrcweir struct Impl1ByteConvertTabData
159cdf0e10cSrcweir {
160cdf0e10cSrcweir     rtl_TextEncoding            meSrcTextEncoding;
161cdf0e10cSrcweir     rtl_TextEncoding            meDestTextEncoding;
162cdf0e10cSrcweir     sal_uChar                   maConvertTab[256];
163cdf0e10cSrcweir     sal_uChar                   maRepConvertTab[256];
164cdf0e10cSrcweir     Impl1ByteConvertTabData*    mpNext;
165cdf0e10cSrcweir };
166cdf0e10cSrcweir 
167cdf0e10cSrcweir // =======================================================================
168cdf0e10cSrcweir 
ImplGet1ByteUnicodeTab(rtl_TextEncoding eTextEncoding)169cdf0e10cSrcweir sal_Unicode* ImplGet1ByteUnicodeTab( rtl_TextEncoding eTextEncoding )
170cdf0e10cSrcweir {
171cdf0e10cSrcweir #ifndef BOOTSTRAP
172cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = ImplGetToolsInData();
173cdf0e10cSrcweir #else
174cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = 0x0;
175cdf0e10cSrcweir #endif
176cdf0e10cSrcweir     Impl1ByteUnicodeTabData*    pTab = pToolsData->mpFirstUniTabData;
177cdf0e10cSrcweir 
178cdf0e10cSrcweir     while ( pTab )
179cdf0e10cSrcweir     {
180cdf0e10cSrcweir         if ( pTab->meTextEncoding == eTextEncoding )
181cdf0e10cSrcweir             return pTab->maUniTab;
182cdf0e10cSrcweir         pTab = pTab->mpNext;
183cdf0e10cSrcweir     }
184cdf0e10cSrcweir 
185cdf0e10cSrcweir     // get TextEncodingInfo
186cdf0e10cSrcweir     rtl_TextEncodingInfo aTextEncInfo;
187cdf0e10cSrcweir     aTextEncInfo.StructSize = sizeof( aTextEncInfo );
188cdf0e10cSrcweir     rtl_getTextEncodingInfo( eTextEncoding, &aTextEncInfo );
189cdf0e10cSrcweir 
190cdf0e10cSrcweir     if ( aTextEncInfo.MaximumCharSize == 1 )
191cdf0e10cSrcweir     {
192cdf0e10cSrcweir         pTab = new Impl1ByteUnicodeTabData;
193cdf0e10cSrcweir         pTab->meTextEncoding = eTextEncoding;
194cdf0e10cSrcweir         pTab->mpNext = pToolsData->mpFirstUniTabData;
195cdf0e10cSrcweir 
196cdf0e10cSrcweir         rtl_TextToUnicodeConverter  hConverter;
197cdf0e10cSrcweir         sal_uInt32                  nInfo;
198cdf0e10cSrcweir         sal_Size                    nSrcBytes;
199cdf0e10cSrcweir         sal_Size                    nDestChars;
200cdf0e10cSrcweir         hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
201cdf0e10cSrcweir         nDestChars = rtl_convertTextToUnicode( hConverter, 0,
202cdf0e10cSrcweir                                                (const sal_Char*)aImplByteTab, 256,
203cdf0e10cSrcweir                                                pTab->maUniTab, 256,
204cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE |
205cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
206cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
207cdf0e10cSrcweir                                                &nInfo, &nSrcBytes );
208cdf0e10cSrcweir         rtl_destroyTextToUnicodeConverter( hConverter );
209cdf0e10cSrcweir 
210cdf0e10cSrcweir         if ( (nSrcBytes != 256) || (nDestChars != 256) )
211cdf0e10cSrcweir             delete pTab;
212cdf0e10cSrcweir         else
213cdf0e10cSrcweir         {
214cdf0e10cSrcweir             pToolsData->mpFirstUniTabData = pTab;
215cdf0e10cSrcweir             return pTab->maUniTab;
216cdf0e10cSrcweir         }
217cdf0e10cSrcweir     }
218cdf0e10cSrcweir 
219cdf0e10cSrcweir     return NULL;
220cdf0e10cSrcweir }
221cdf0e10cSrcweir 
222cdf0e10cSrcweir // -----------------------------------------------------------------------
223cdf0e10cSrcweir 
ImplGet1ByteConvertTab(rtl_TextEncoding eSrcTextEncoding,rtl_TextEncoding eDestTextEncoding,sal_Bool bReplace)224cdf0e10cSrcweir static sal_uChar* ImplGet1ByteConvertTab( rtl_TextEncoding eSrcTextEncoding,
225cdf0e10cSrcweir                                           rtl_TextEncoding eDestTextEncoding,
226cdf0e10cSrcweir                                           sal_Bool bReplace )
227cdf0e10cSrcweir {
228cdf0e10cSrcweir #ifndef BOOTSTRAP
229cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = ImplGetToolsInData();
230cdf0e10cSrcweir #else
231cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = 0x0;
232cdf0e10cSrcweir #endif
233cdf0e10cSrcweir     Impl1ByteConvertTabData*    pTab = pToolsData->mpFirstConvertTabData;
234cdf0e10cSrcweir 
235cdf0e10cSrcweir     while ( pTab )
236cdf0e10cSrcweir     {
237cdf0e10cSrcweir         if ( (pTab->meSrcTextEncoding == eSrcTextEncoding) &&
238cdf0e10cSrcweir              (pTab->meDestTextEncoding == eDestTextEncoding) )
239cdf0e10cSrcweir         {
240cdf0e10cSrcweir             if ( bReplace )
241cdf0e10cSrcweir                 return pTab->maRepConvertTab;
242cdf0e10cSrcweir             else
243cdf0e10cSrcweir                 return pTab->maConvertTab;
244cdf0e10cSrcweir         }
245cdf0e10cSrcweir         pTab = pTab->mpNext;
246cdf0e10cSrcweir     }
247cdf0e10cSrcweir 
248cdf0e10cSrcweir     // get TextEncodingInfo
249cdf0e10cSrcweir     rtl_TextEncodingInfo aTextEncInfo1;
250cdf0e10cSrcweir     aTextEncInfo1.StructSize = sizeof( aTextEncInfo1 );
251cdf0e10cSrcweir     rtl_getTextEncodingInfo( eSrcTextEncoding, &aTextEncInfo1 );
252cdf0e10cSrcweir     rtl_TextEncodingInfo aTextEncInfo2;
253cdf0e10cSrcweir     aTextEncInfo2.StructSize = sizeof( aTextEncInfo2 );
254cdf0e10cSrcweir     rtl_getTextEncodingInfo( eDestTextEncoding, &aTextEncInfo2 );
255cdf0e10cSrcweir 
256cdf0e10cSrcweir     if ( (aTextEncInfo1.MaximumCharSize == 1) &&
257cdf0e10cSrcweir          (aTextEncInfo2.MaximumCharSize == 1) )
258cdf0e10cSrcweir     {
259cdf0e10cSrcweir         pTab = new Impl1ByteConvertTabData;
260cdf0e10cSrcweir         pTab->meSrcTextEncoding = eSrcTextEncoding;
261cdf0e10cSrcweir         pTab->meDestTextEncoding = eDestTextEncoding;
262cdf0e10cSrcweir         pTab->mpNext = pToolsData->mpFirstConvertTabData;
263cdf0e10cSrcweir 
264cdf0e10cSrcweir         rtl_TextToUnicodeConverter  hConverter;
265cdf0e10cSrcweir         rtl_UnicodeToTextConverter  hConverter2;
266cdf0e10cSrcweir         sal_uInt32                  nInfo;
267cdf0e10cSrcweir         sal_Size                    nSrcBytes;
268cdf0e10cSrcweir         sal_Size                    nDestChars;
269cdf0e10cSrcweir         sal_Size                    nSrcChars;
270cdf0e10cSrcweir         sal_Size                    nDestBytes;
271cdf0e10cSrcweir         sal_Unicode                 aTempBuf[256];
272cdf0e10cSrcweir         hConverter = rtl_createTextToUnicodeConverter( eSrcTextEncoding );
273cdf0e10cSrcweir         nDestChars = rtl_convertTextToUnicode( hConverter, 0,
274cdf0e10cSrcweir                                                (const sal_Char*)aImplByteTab, 256,
275cdf0e10cSrcweir                                                aTempBuf, 256,
276cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
277cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
278cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
279cdf0e10cSrcweir                                                &nInfo, &nSrcBytes );
280cdf0e10cSrcweir         rtl_destroyTextToUnicodeConverter( hConverter );
281cdf0e10cSrcweir         if ( (nSrcBytes != 256) || (nDestChars != 256) )
282cdf0e10cSrcweir             delete pTab;
283cdf0e10cSrcweir         else
284cdf0e10cSrcweir         {
285cdf0e10cSrcweir             hConverter2 = rtl_createUnicodeToTextConverter( eDestTextEncoding );
286cdf0e10cSrcweir             nDestBytes = rtl_convertUnicodeToText( hConverter2, 0,
287cdf0e10cSrcweir                                                    aTempBuf, 256,
288cdf0e10cSrcweir                                                    (sal_Char*)pTab->maConvertTab, 256,
289cdf0e10cSrcweir                                                    RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0 |
290cdf0e10cSrcweir                                                    RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT,
291cdf0e10cSrcweir                                                    &nInfo, &nSrcChars );
292cdf0e10cSrcweir             if ( (nDestBytes == 256) || (nSrcChars == 256) )
293cdf0e10cSrcweir             {
294cdf0e10cSrcweir                 nDestBytes = rtl_convertUnicodeToText( hConverter2, 0,
295cdf0e10cSrcweir                                                        aTempBuf, 256,
296cdf0e10cSrcweir                                                        (sal_Char*)pTab->maRepConvertTab, 256,
297cdf0e10cSrcweir                                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
298cdf0e10cSrcweir                                                        RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT |
299cdf0e10cSrcweir                                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE,
300cdf0e10cSrcweir                                                        &nInfo, &nSrcChars );
301cdf0e10cSrcweir             }
302cdf0e10cSrcweir             rtl_destroyUnicodeToTextConverter( hConverter2 );
303cdf0e10cSrcweir             if ( (nDestBytes != 256) || (nSrcChars != 256) )
304cdf0e10cSrcweir                 delete pTab;
305cdf0e10cSrcweir             else
306cdf0e10cSrcweir             {
307cdf0e10cSrcweir                 pToolsData->mpFirstConvertTabData = pTab;
308cdf0e10cSrcweir                 if ( bReplace )
309cdf0e10cSrcweir                     return pTab->maRepConvertTab;
310cdf0e10cSrcweir                 else
311cdf0e10cSrcweir                     return pTab->maConvertTab;
312cdf0e10cSrcweir             }
313cdf0e10cSrcweir         }
314cdf0e10cSrcweir     }
315cdf0e10cSrcweir 
316cdf0e10cSrcweir     return NULL;
317cdf0e10cSrcweir }
318cdf0e10cSrcweir 
319cdf0e10cSrcweir // =======================================================================
320cdf0e10cSrcweir 
ImplDeleteCharTabData()321cdf0e10cSrcweir void ImplDeleteCharTabData()
322cdf0e10cSrcweir {
323cdf0e10cSrcweir #ifndef BOOTSTRAP
324cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = ImplGetToolsInData();
325cdf0e10cSrcweir #else
326cdf0e10cSrcweir     TOOLSINDATA*                pToolsData = 0x0;
327cdf0e10cSrcweir #endif
328cdf0e10cSrcweir     Impl1ByteUnicodeTabData*    pTempUniTab;
329cdf0e10cSrcweir     Impl1ByteUnicodeTabData*    pUniTab = pToolsData->mpFirstUniTabData;
330cdf0e10cSrcweir     while ( pUniTab )
331cdf0e10cSrcweir     {
332cdf0e10cSrcweir         pTempUniTab = pUniTab->mpNext;
333cdf0e10cSrcweir         delete pUniTab;
334cdf0e10cSrcweir         pUniTab = pTempUniTab;
335cdf0e10cSrcweir     }
336cdf0e10cSrcweir     pToolsData->mpFirstUniTabData = NULL;
337cdf0e10cSrcweir 
338cdf0e10cSrcweir     Impl1ByteConvertTabData*    pTempConvertTab;
339cdf0e10cSrcweir     Impl1ByteConvertTabData*    pConvertTab = pToolsData->mpFirstConvertTabData;
340cdf0e10cSrcweir     while ( pConvertTab )
341cdf0e10cSrcweir     {
342cdf0e10cSrcweir         pTempConvertTab = pConvertTab->mpNext;
343cdf0e10cSrcweir         delete pConvertTab;
344cdf0e10cSrcweir         pConvertTab = pTempConvertTab;
345cdf0e10cSrcweir     }
346cdf0e10cSrcweir     pToolsData->mpFirstConvertTabData = NULL;
347cdf0e10cSrcweir }
348cdf0e10cSrcweir 
349cdf0e10cSrcweir // =======================================================================
350cdf0e10cSrcweir 
ImplStringConvert(rtl_TextEncoding eSource,rtl_TextEncoding eTarget,sal_Bool bReplace)351cdf0e10cSrcweir void ByteString::ImplStringConvert(
352cdf0e10cSrcweir     rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace )
353cdf0e10cSrcweir {
354cdf0e10cSrcweir     sal_uChar* pConvertTab = ImplGet1ByteConvertTab( eSource, eTarget, bReplace );
355cdf0e10cSrcweir     if ( pConvertTab )
356cdf0e10cSrcweir     {
357cdf0e10cSrcweir         char* pStr = mpData->maStr;
358cdf0e10cSrcweir         while ( *pStr )
359cdf0e10cSrcweir         {
360cdf0e10cSrcweir             sal_uChar c = (sal_uChar)*pStr;
361cdf0e10cSrcweir             sal_uChar cConv = pConvertTab[c];
362cdf0e10cSrcweir             if ( c != cConv )
363cdf0e10cSrcweir             {
364cdf0e10cSrcweir                 pStr = ImplCopyStringData( pStr );
365cdf0e10cSrcweir                 *pStr = (char)cConv;
366cdf0e10cSrcweir             }
367cdf0e10cSrcweir 
368cdf0e10cSrcweir             pStr++;
369cdf0e10cSrcweir         }
370cdf0e10cSrcweir     }
371cdf0e10cSrcweir     else
372cdf0e10cSrcweir     {
373cdf0e10cSrcweir         rtl_UnicodeToTextConverter  hSrcConverter = rtl_createTextToUnicodeConverter( eSource );
374cdf0e10cSrcweir         sal_uInt32                  nInfo;
375cdf0e10cSrcweir         sal_Size                    nSrcBytes;
376cdf0e10cSrcweir         sal_Size                    nDestChars;
377cdf0e10cSrcweir         sal_Size                    nTempLen;
378cdf0e10cSrcweir         sal_Unicode*                pTempBuf;
379cdf0e10cSrcweir         nTempLen = mpData->mnLen;
380cdf0e10cSrcweir         pTempBuf = new sal_Unicode[nTempLen];
381cdf0e10cSrcweir         nDestChars = rtl_convertTextToUnicode( hSrcConverter, 0,
382cdf0e10cSrcweir                                                mpData->maStr, mpData->mnLen,
383cdf0e10cSrcweir                                                pTempBuf, nTempLen,
384cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_FLUSH |
385cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE |
386cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
387cdf0e10cSrcweir                                                RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
388cdf0e10cSrcweir                                                &nInfo, &nSrcBytes );
389cdf0e10cSrcweir         rtl_destroyTextToUnicodeConverter( hSrcConverter );
390cdf0e10cSrcweir         // Hier werten wir bReplace nicht aus, da fuer MultiByte-Textencodings
391cdf0e10cSrcweir         // sowieso keine Ersatzdarstellung moeglich ist. Da sich der String
392cdf0e10cSrcweir         // sowieso in der Laenge aendern kann, nehmen wir auch sonst keine
393cdf0e10cSrcweir         // Ruecksicht darauf, das die Laenge erhalten bleibt.
394cdf0e10cSrcweir         ImplUpdateStringFromUniString( pTempBuf, nDestChars, eTarget,
395cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
396cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT |
397cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE |
398cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR |
399cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 |
400cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE |
401cdf0e10cSrcweir                                        RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE );
402cdf0e10cSrcweir         delete [] pTempBuf;
403cdf0e10cSrcweir     }
404cdf0e10cSrcweir }
405cdf0e10cSrcweir 
406cdf0e10cSrcweir // =======================================================================
407cdf0e10cSrcweir 
Convert(rtl_TextEncoding eSource,rtl_TextEncoding eTarget,sal_Bool bReplace)408cdf0e10cSrcweir ByteString& ByteString::Convert( rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace )
409cdf0e10cSrcweir {
410cdf0e10cSrcweir     DBG_CHKTHIS( ByteString, DbgCheckByteString );
411cdf0e10cSrcweir 
412cdf0e10cSrcweir     // rtl_TextEncoding Dontknow kann nicht konvertiert werden
413cdf0e10cSrcweir     if ( (eSource == RTL_TEXTENCODING_DONTKNOW) || (eTarget == RTL_TEXTENCODING_DONTKNOW) )
414cdf0e10cSrcweir         return *this;
415cdf0e10cSrcweir 
416cdf0e10cSrcweir     // Wenn Source und Target gleich sind, muss nicht konvertiert werden
417cdf0e10cSrcweir     if ( eSource == eTarget )
418cdf0e10cSrcweir         return *this;
419cdf0e10cSrcweir 
420cdf0e10cSrcweir     // rtl_TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten
421cdf0e10cSrcweir     // wollen wir die Zeichencodes beibehalten
422cdf0e10cSrcweir     if ( (eSource == RTL_TEXTENCODING_SYMBOL) &&
423cdf0e10cSrcweir          (eTarget != RTL_TEXTENCODING_UTF7) &&
424cdf0e10cSrcweir          (eTarget != RTL_TEXTENCODING_UTF8) )
425cdf0e10cSrcweir         return *this;
426cdf0e10cSrcweir     if ( (eTarget == RTL_TEXTENCODING_SYMBOL) &&
427cdf0e10cSrcweir          (eSource != RTL_TEXTENCODING_UTF7) &&
428cdf0e10cSrcweir          (eSource != RTL_TEXTENCODING_UTF8) )
429cdf0e10cSrcweir         return *this;
430cdf0e10cSrcweir 
431cdf0e10cSrcweir     // Zeichensatz umwandeln
432cdf0e10cSrcweir     ImplStringConvert( eSource, eTarget, bReplace );
433cdf0e10cSrcweir 
434cdf0e10cSrcweir     return *this;
435cdf0e10cSrcweir }
436cdf0e10cSrcweir 
437cdf0e10cSrcweir // =======================================================================
438cdf0e10cSrcweir 
Convert(char c,rtl_TextEncoding eSource,rtl_TextEncoding eTarget,sal_Bool bReplace)439cdf0e10cSrcweir char ByteString::Convert( char c,
440cdf0e10cSrcweir                           rtl_TextEncoding eSource, rtl_TextEncoding eTarget,
441cdf0e10cSrcweir                           sal_Bool bReplace )
442cdf0e10cSrcweir {
443cdf0e10cSrcweir     // TextEncoding Dontknow kann nicht konvertiert werden
444cdf0e10cSrcweir     if ( (eSource == RTL_TEXTENCODING_DONTKNOW) || (eTarget == RTL_TEXTENCODING_DONTKNOW) )
445cdf0e10cSrcweir         return '\0';
446cdf0e10cSrcweir 
447cdf0e10cSrcweir     // Wenn Source und Target gleich sind, muss nicht konvertiert werden
448cdf0e10cSrcweir     if ( eSource == eTarget )
449cdf0e10cSrcweir         return c;
450cdf0e10cSrcweir 
451cdf0e10cSrcweir     // TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten
452cdf0e10cSrcweir     // wollen wir die Zeichencodes beibehalten
453cdf0e10cSrcweir     if ( (eSource == RTL_TEXTENCODING_SYMBOL) &&
454cdf0e10cSrcweir          (eTarget != RTL_TEXTENCODING_UTF7) &&
455cdf0e10cSrcweir          (eTarget != RTL_TEXTENCODING_UTF8) )
456cdf0e10cSrcweir         return '\0';
457cdf0e10cSrcweir     if ( (eTarget == RTL_TEXTENCODING_SYMBOL) &&
458cdf0e10cSrcweir          (eSource != RTL_TEXTENCODING_UTF7) &&
459cdf0e10cSrcweir          (eSource != RTL_TEXTENCODING_UTF8) )
460cdf0e10cSrcweir         return '\0';
461cdf0e10cSrcweir 
462cdf0e10cSrcweir     sal_uChar* pConvertTab = ImplGet1ByteConvertTab( eSource, eTarget, bReplace );
463cdf0e10cSrcweir     if ( pConvertTab )
464cdf0e10cSrcweir         return (char)pConvertTab[(sal_uChar)c];
465cdf0e10cSrcweir     else
466cdf0e10cSrcweir         return '\0';
467cdf0e10cSrcweir }
468cdf0e10cSrcweir 
469cdf0e10cSrcweir // =======================================================================
470cdf0e10cSrcweir 
ConvertToUnicode(char c,rtl_TextEncoding eTextEncoding)471cdf0e10cSrcweir sal_Unicode ByteString::ConvertToUnicode( char c, rtl_TextEncoding eTextEncoding )
472cdf0e10cSrcweir {
473cdf0e10cSrcweir     sal_Size nLen = 1;
474cdf0e10cSrcweir     return ConvertToUnicode( &c, &nLen, eTextEncoding );
475cdf0e10cSrcweir }
476cdf0e10cSrcweir 
477cdf0e10cSrcweir // -----------------------------------------------------------------------
478cdf0e10cSrcweir 
ConvertFromUnicode(sal_Unicode c,rtl_TextEncoding eTextEncoding,sal_Bool bReplace)479cdf0e10cSrcweir char ByteString::ConvertFromUnicode( sal_Unicode c, rtl_TextEncoding eTextEncoding, sal_Bool bReplace )
480cdf0e10cSrcweir {
481cdf0e10cSrcweir     sal_Size    nLen;
482cdf0e10cSrcweir     char        aBuf[30];
483cdf0e10cSrcweir     nLen = ConvertFromUnicode( c, aBuf, sizeof( aBuf ), eTextEncoding, bReplace );
484cdf0e10cSrcweir     if ( nLen == 1 )
485cdf0e10cSrcweir         return aBuf[0];
486cdf0e10cSrcweir     else
487cdf0e10cSrcweir         return 0;
488cdf0e10cSrcweir }
489cdf0e10cSrcweir 
490cdf0e10cSrcweir // -----------------------------------------------------------------------
491cdf0e10cSrcweir 
ConvertToUnicode(const char * pChar,sal_Size * pLen,rtl_TextEncoding eTextEncoding)492cdf0e10cSrcweir sal_Unicode ByteString::ConvertToUnicode( const char* pChar, sal_Size* pLen, rtl_TextEncoding eTextEncoding )
493cdf0e10cSrcweir {
494cdf0e10cSrcweir     // TextEncoding Dontknow wird nicht konvertiert
495cdf0e10cSrcweir     if ( eTextEncoding == RTL_TEXTENCODING_DONTKNOW )
496cdf0e10cSrcweir         return 0;
497cdf0e10cSrcweir 
498cdf0e10cSrcweir     rtl_TextToUnicodeConverter  hConverter;
499cdf0e10cSrcweir     sal_uInt32                  nInfo;
500cdf0e10cSrcweir     sal_Size                    nSrcBytes;
501cdf0e10cSrcweir     sal_Size                    nDestChars;
502cdf0e10cSrcweir     sal_Unicode                 nConvChar;
503cdf0e10cSrcweir     hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
504cdf0e10cSrcweir     nDestChars = rtl_convertTextToUnicode( hConverter, 0,
505cdf0e10cSrcweir                                            (const sal_Char*)pChar, *pLen,
506cdf0e10cSrcweir                                            &nConvChar, 1,
507cdf0e10cSrcweir                                            RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
508cdf0e10cSrcweir                                            RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
509cdf0e10cSrcweir                                            RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
510cdf0e10cSrcweir                                            RTL_TEXTTOUNICODE_FLAGS_FLUSH,
511cdf0e10cSrcweir                                            &nInfo, &nSrcBytes );
512cdf0e10cSrcweir     rtl_destroyTextToUnicodeConverter( hConverter );
513cdf0e10cSrcweir 
514cdf0e10cSrcweir     if ( nDestChars == 1 )
515cdf0e10cSrcweir     {
516cdf0e10cSrcweir         *pLen = nSrcBytes;
517cdf0e10cSrcweir         return nConvChar;
518cdf0e10cSrcweir     }
519cdf0e10cSrcweir     else
520cdf0e10cSrcweir     {
521cdf0e10cSrcweir         *pLen = 0;
522cdf0e10cSrcweir         return 0;
523cdf0e10cSrcweir     }
524cdf0e10cSrcweir }
525cdf0e10cSrcweir 
526cdf0e10cSrcweir // -----------------------------------------------------------------------
527cdf0e10cSrcweir 
ConvertFromUnicode(sal_Unicode c,char * pBuf,sal_Size nBufLen,rtl_TextEncoding eTextEncoding,sal_Bool bReplace)528cdf0e10cSrcweir sal_Size ByteString::ConvertFromUnicode( sal_Unicode c, char* pBuf, sal_Size nBufLen, rtl_TextEncoding eTextEncoding,
529cdf0e10cSrcweir                                          sal_Bool bReplace )
530cdf0e10cSrcweir {
531cdf0e10cSrcweir     // TextEncoding Dontknow wird nicht konvertiert
532cdf0e10cSrcweir     if ( eTextEncoding == RTL_TEXTENCODING_DONTKNOW )
533cdf0e10cSrcweir         return '\0';
534cdf0e10cSrcweir 
535cdf0e10cSrcweir     rtl_UnicodeToTextConverter  hConverter;
536cdf0e10cSrcweir     sal_uInt32                  nInfo;
537cdf0e10cSrcweir     sal_Size                    nSrcChars;
538cdf0e10cSrcweir     sal_Size                    nDestBytes;
539cdf0e10cSrcweir     sal_Unicode                 cUni = c;
540cdf0e10cSrcweir     sal_uInt32                  nFlags = RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE |
541cdf0e10cSrcweir                                          RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE |
542cdf0e10cSrcweir                                          RTL_UNICODETOTEXT_FLAGS_FLUSH;
543cdf0e10cSrcweir     if ( bReplace )
544cdf0e10cSrcweir     {
545cdf0e10cSrcweir         nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
546cdf0e10cSrcweir                   RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT;
547cdf0e10cSrcweir         nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE;
548cdf0e10cSrcweir         if ( nBufLen > 1 )
549cdf0e10cSrcweir             nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR;
550cdf0e10cSrcweir     }
551cdf0e10cSrcweir     else
552cdf0e10cSrcweir     {
553cdf0e10cSrcweir         nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0 |
554cdf0e10cSrcweir                   RTL_UNICODETOTEXT_FLAGS_INVALID_0;
555cdf0e10cSrcweir     }
556cdf0e10cSrcweir 
557cdf0e10cSrcweir     hConverter = rtl_createUnicodeToTextConverter( eTextEncoding );
558cdf0e10cSrcweir     nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
559cdf0e10cSrcweir                                            &cUni, 1,
560cdf0e10cSrcweir                                            (sal_Char*)pBuf, nBufLen,
561cdf0e10cSrcweir                                            nFlags,
562cdf0e10cSrcweir                                            &nInfo, &nSrcChars );
563cdf0e10cSrcweir     rtl_destroyUnicodeToTextConverter( hConverter );
564cdf0e10cSrcweir     return nDestBytes;
565cdf0e10cSrcweir }
566cdf0e10cSrcweir 
567cdf0e10cSrcweir // =======================================================================
568cdf0e10cSrcweir 
ByteString(const rtl::OString & rStr)569cdf0e10cSrcweir ByteString::ByteString( const rtl::OString& rStr )
570cdf0e10cSrcweir     : mpData(NULL)
571cdf0e10cSrcweir {
572cdf0e10cSrcweir     DBG_CTOR( ByteString, DbgCheckByteString );
573cdf0e10cSrcweir 
574cdf0e10cSrcweir     OSL_ENSURE(rStr.pData->length < STRING_MAXLEN,
575cdf0e10cSrcweir                "Overflowing rtl::OString -> ByteString cut to zero length");
576cdf0e10cSrcweir 
577cdf0e10cSrcweir     if (rStr.pData->length < STRING_MAXLEN)
578cdf0e10cSrcweir     {
579cdf0e10cSrcweir         mpData = reinterpret_cast< ByteStringData * >(const_cast< rtl::OString & >(rStr).pData);
580cdf0e10cSrcweir         STRING_ACQUIRE((STRING_TYPE *)mpData);
581cdf0e10cSrcweir     }
582cdf0e10cSrcweir     else
583cdf0e10cSrcweir     {
584cdf0e10cSrcweir         STRING_NEW((STRING_TYPE **)&mpData);
585cdf0e10cSrcweir     }
586cdf0e10cSrcweir }
587cdf0e10cSrcweir 
588cdf0e10cSrcweir // -----------------------------------------------------------------------
589cdf0e10cSrcweir 
Assign(const rtl::OString & rStr)590cdf0e10cSrcweir ByteString& ByteString::Assign( const rtl::OString& rStr )
591cdf0e10cSrcweir {
592cdf0e10cSrcweir     DBG_CHKTHIS( ByteString, DbgCheckByteString );
593cdf0e10cSrcweir 
594cdf0e10cSrcweir     OSL_ENSURE(rStr.pData->length < STRING_MAXLEN,
595cdf0e10cSrcweir                "Overflowing rtl::OString -> ByteString cut to zero length");
596cdf0e10cSrcweir 
597cdf0e10cSrcweir     if (rStr.pData->length < STRING_MAXLEN)
598cdf0e10cSrcweir     {
599cdf0e10cSrcweir         STRING_RELEASE((STRING_TYPE *)mpData);
600cdf0e10cSrcweir         mpData = reinterpret_cast< ByteStringData * >(const_cast< rtl::OString & >(rStr).pData);
601cdf0e10cSrcweir         STRING_ACQUIRE((STRING_TYPE *)mpData);
602cdf0e10cSrcweir     }
603cdf0e10cSrcweir     else
604cdf0e10cSrcweir     {
605cdf0e10cSrcweir         STRING_NEW((STRING_TYPE **)&mpData);
606cdf0e10cSrcweir     }
607cdf0e10cSrcweir 
608cdf0e10cSrcweir     return *this;
609cdf0e10cSrcweir }
610