xref: /aoo41x/main/tools/source/string/strcvt.cxx (revision 89b56da7)
1*89b56da7SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*89b56da7SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*89b56da7SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*89b56da7SAndrew Rist  * distributed with this work for additional information
6*89b56da7SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*89b56da7SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*89b56da7SAndrew Rist  * "License"); you may not use this file except in compliance
9*89b56da7SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*89b56da7SAndrew Rist  *
11*89b56da7SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*89b56da7SAndrew Rist  *
13*89b56da7SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*89b56da7SAndrew Rist  * software distributed under the License is distributed on an
15*89b56da7SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*89b56da7SAndrew Rist  * KIND, either express or implied.  See the License for the
17*89b56da7SAndrew Rist  * specific language governing permissions and limitations
18*89b56da7SAndrew Rist  * under the License.
19*89b56da7SAndrew Rist  *
20*89b56da7SAndrew Rist  *************************************************************/
21*89b56da7SAndrew Rist 
22*89b56da7SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // no include "precompiled_tools.hxx" because this is included in other cxx files.
25cdf0e10cSrcweir 
26cdf0e10cSrcweir // -----------------------------------------------------------------------
27cdf0e10cSrcweir 
ImplUpdateStringFromUniString(const sal_Unicode * pUniStr,sal_Size nUniLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)28cdf0e10cSrcweir void ByteString::ImplUpdateStringFromUniString(
29cdf0e10cSrcweir     const sal_Unicode* pUniStr, sal_Size nUniLen,
30cdf0e10cSrcweir     rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
31cdf0e10cSrcweir {
32cdf0e10cSrcweir 	ByteStringData* pNewStringData = NULL;
33cdf0e10cSrcweir 	rtl_uString2String( (rtl_String **)(&pNewStringData),
34cdf0e10cSrcweir 						pUniStr, nUniLen,
35cdf0e10cSrcweir 						eTextEncoding, nCvtFlags );
36cdf0e10cSrcweir 	STRING_RELEASE((STRING_TYPE *)mpData);
37cdf0e10cSrcweir 	mpData = pNewStringData;
38cdf0e10cSrcweir }
39cdf0e10cSrcweir 
40cdf0e10cSrcweir // =======================================================================
41cdf0e10cSrcweir 
ByteString(const UniString & rUniStr,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)42cdf0e10cSrcweir ByteString::ByteString( const UniString& rUniStr, rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
43cdf0e10cSrcweir {
44cdf0e10cSrcweir 	DBG_CTOR( ByteString, DbgCheckByteString );
45cdf0e10cSrcweir 	DBG_CHKOBJ( &rUniStr, UniString, DbgCheckUniString );
46cdf0e10cSrcweir 
47cdf0e10cSrcweir 	mpData = NULL;
48cdf0e10cSrcweir 	rtl_uString2String( (rtl_String **)(&mpData),
49cdf0e10cSrcweir 						rUniStr.mpData->maStr, rUniStr.mpData->mnLen,
50cdf0e10cSrcweir 						eTextEncoding, nCvtFlags );
51cdf0e10cSrcweir }
52cdf0e10cSrcweir 
53cdf0e10cSrcweir // -----------------------------------------------------------------------
54cdf0e10cSrcweir 
ByteString(const UniString & rUniStr,xub_StrLen nPos,xub_StrLen nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)55cdf0e10cSrcweir ByteString::ByteString( const UniString& rUniStr, xub_StrLen nPos, xub_StrLen nLen,
56cdf0e10cSrcweir 						rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
57cdf0e10cSrcweir {
58cdf0e10cSrcweir 	DBG_CTOR( ByteString, DbgCheckByteString );
59cdf0e10cSrcweir 	DBG_CHKOBJ( &rUniStr, UniString, DbgCheckUniString );
60cdf0e10cSrcweir 
61cdf0e10cSrcweir 	// Stringlaenge ermitteln
62cdf0e10cSrcweir 	if ( nPos > rUniStr.mpData->mnLen )
63cdf0e10cSrcweir 		nLen = 0;
64cdf0e10cSrcweir 	else
65cdf0e10cSrcweir 	{
66cdf0e10cSrcweir 		// Laenge korrigieren, wenn noetig
67cdf0e10cSrcweir 		sal_Int32 nMaxLen = rUniStr.mpData->mnLen-nPos;
68cdf0e10cSrcweir 		if ( nLen > nMaxLen )
69cdf0e10cSrcweir 			nLen = static_cast< xub_StrLen >(nMaxLen);
70cdf0e10cSrcweir 	}
71cdf0e10cSrcweir 
72cdf0e10cSrcweir 	mpData = NULL;
73cdf0e10cSrcweir 	rtl_uString2String( (rtl_String **)(&mpData),
74cdf0e10cSrcweir 						rUniStr.mpData->maStr+nPos, nLen,
75cdf0e10cSrcweir 						eTextEncoding, nCvtFlags );
76cdf0e10cSrcweir }
77cdf0e10cSrcweir 
78cdf0e10cSrcweir // -----------------------------------------------------------------------
79cdf0e10cSrcweir 
ByteString(const sal_Unicode * pUniStr,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)80cdf0e10cSrcweir ByteString::ByteString( const sal_Unicode* pUniStr,
81cdf0e10cSrcweir 						rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
82cdf0e10cSrcweir {
83cdf0e10cSrcweir 	DBG_CTOR( ByteString, DbgCheckByteString );
84cdf0e10cSrcweir 	DBG_ASSERT( pUniStr, "ByteString::ByteString() - pUniStr is NULL" );
85cdf0e10cSrcweir 
86cdf0e10cSrcweir 	mpData = NULL;
87cdf0e10cSrcweir 	rtl_uString2String( (rtl_String **)(&mpData),
88cdf0e10cSrcweir 						pUniStr, ImplStringLen( pUniStr ),
89cdf0e10cSrcweir 						eTextEncoding, nCvtFlags );
90cdf0e10cSrcweir }
91cdf0e10cSrcweir 
92cdf0e10cSrcweir // -----------------------------------------------------------------------
93cdf0e10cSrcweir 
ByteString(const sal_Unicode * pUniStr,xub_StrLen nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)94cdf0e10cSrcweir ByteString::ByteString( const sal_Unicode* pUniStr, xub_StrLen nLen,
95cdf0e10cSrcweir 						rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags )
96cdf0e10cSrcweir {
97cdf0e10cSrcweir 	DBG_CTOR( ByteString, DbgCheckByteString );
98cdf0e10cSrcweir 	DBG_ASSERT( pUniStr, "ByteString::ByteString() - pUniStr is NULL" );
99cdf0e10cSrcweir 
100cdf0e10cSrcweir 	if ( nLen == STRING_LEN )
101cdf0e10cSrcweir 		nLen = ImplStringLen( pUniStr );
102cdf0e10cSrcweir 
103cdf0e10cSrcweir 	mpData = NULL;
104cdf0e10cSrcweir 	rtl_uString2String( (rtl_String **)(&mpData),
105cdf0e10cSrcweir 						pUniStr, nLen,
106cdf0e10cSrcweir 						eTextEncoding, nCvtFlags );
107cdf0e10cSrcweir }
108cdf0e10cSrcweir 
109cdf0e10cSrcweir // =======================================================================
110cdf0e10cSrcweir 
111cdf0e10cSrcweir static sal_uChar aImplByteTab[256] =
112cdf0e10cSrcweir {
113cdf0e10cSrcweir 	0,	 1,   2,   3,	4,	 5,   6,   7,
114cdf0e10cSrcweir 	8,	 9,  10,  11,  12,	13,  14,  15,
115cdf0e10cSrcweir    16,	17,  18,  19,  20,	21,  22,  23,
116cdf0e10cSrcweir    24,	25,  26,  27,  28,	29,  30,  31,
117cdf0e10cSrcweir    32,	33,  34,  35,  36,	37,  38,  39,
118cdf0e10cSrcweir    40,	41,  42,  43,  44,	45,  46,  47,
119cdf0e10cSrcweir    48,	49,  50,  51,  52,	53,  54,  55,
120cdf0e10cSrcweir    56,	57,  58,  59,  60,	61,  62,  63,
121cdf0e10cSrcweir    64,	65,  66,  67,  68,	69,  70,  71,
122cdf0e10cSrcweir    72,	73,  74,  75,  76,	77,  78,  79,
123cdf0e10cSrcweir    80,	81,  82,  83,  84,	85,  86,  87,
124cdf0e10cSrcweir    88,	89,  90,  91,  92,	93,  94,  95,
125cdf0e10cSrcweir    96,	97,  98,  99, 100, 101, 102, 103,
126cdf0e10cSrcweir   104, 105, 106, 107, 108, 109, 110, 111,
127cdf0e10cSrcweir   112, 113, 114, 115, 116, 117, 118, 119,
128cdf0e10cSrcweir   120, 121, 122, 123, 124, 125, 126, 127,
129cdf0e10cSrcweir   128, 129, 130, 131, 132, 133, 134, 135,
130cdf0e10cSrcweir   136, 137, 138, 139, 140, 141, 142, 143,
131cdf0e10cSrcweir   144, 145, 146, 147, 148, 149, 150, 151,
132cdf0e10cSrcweir   152, 153, 154, 155, 156, 157, 158, 159,
133cdf0e10cSrcweir   160, 161, 162, 163, 164, 165, 166, 167,
134cdf0e10cSrcweir   168, 169, 170, 171, 172, 173, 174, 175,
135cdf0e10cSrcweir   176, 177, 178, 179, 180, 181, 182, 183,
136cdf0e10cSrcweir   184, 185, 186, 187, 188, 189, 190, 191,
137cdf0e10cSrcweir   192, 193, 194, 195, 196, 197, 198, 199,
138cdf0e10cSrcweir   200, 201, 202, 203, 204, 205, 206, 207,
139cdf0e10cSrcweir   208, 209, 210, 211, 212, 213, 214, 215,
140cdf0e10cSrcweir   216, 217, 218, 219, 220, 221, 222, 223,
141cdf0e10cSrcweir   224, 225, 226, 227, 228, 229, 230, 231,
142cdf0e10cSrcweir   232, 233, 234, 235, 236, 237, 238, 239,
143cdf0e10cSrcweir   240, 241, 242, 243, 244, 245, 246, 247,
144cdf0e10cSrcweir   248, 249, 250, 251, 252, 253, 254, 255
145cdf0e10cSrcweir };
146cdf0e10cSrcweir 
147cdf0e10cSrcweir // =======================================================================
148cdf0e10cSrcweir 
149cdf0e10cSrcweir struct Impl1ByteUnicodeTabData
150cdf0e10cSrcweir {
151cdf0e10cSrcweir 	rtl_TextEncoding			meTextEncoding;
152cdf0e10cSrcweir 	sal_Unicode 				maUniTab[256];
153cdf0e10cSrcweir 	Impl1ByteUnicodeTabData*	mpNext;
154cdf0e10cSrcweir };
155cdf0e10cSrcweir 
156cdf0e10cSrcweir // -----------------------------------------------------------------------
157cdf0e10cSrcweir 
158cdf0e10cSrcweir struct Impl1ByteConvertTabData
159cdf0e10cSrcweir {
160cdf0e10cSrcweir 	rtl_TextEncoding			meSrcTextEncoding;
161cdf0e10cSrcweir 	rtl_TextEncoding			meDestTextEncoding;
162cdf0e10cSrcweir 	sal_uChar					maConvertTab[256];
163cdf0e10cSrcweir 	sal_uChar					maRepConvertTab[256];
164cdf0e10cSrcweir 	Impl1ByteConvertTabData*	mpNext;
165cdf0e10cSrcweir };
166cdf0e10cSrcweir 
167cdf0e10cSrcweir // =======================================================================
168cdf0e10cSrcweir 
ImplGet1ByteUnicodeTab(rtl_TextEncoding eTextEncoding)169cdf0e10cSrcweir sal_Unicode* ImplGet1ByteUnicodeTab( rtl_TextEncoding eTextEncoding )
170cdf0e10cSrcweir {
171cdf0e10cSrcweir #ifndef BOOTSTRAP
172cdf0e10cSrcweir 	TOOLSINDATA*				pToolsData = ImplGetToolsInData();
173cdf0e10cSrcweir #else
174cdf0e10cSrcweir 	TOOLSINDATA*				pToolsData = 0x0;
175cdf0e10cSrcweir #endif
176cdf0e10cSrcweir 	Impl1ByteUnicodeTabData*	pTab = pToolsData->mpFirstUniTabData;
177cdf0e10cSrcweir 
178cdf0e10cSrcweir 	while ( pTab )
179cdf0e10cSrcweir 	{
180cdf0e10cSrcweir 		if ( pTab->meTextEncoding == eTextEncoding )
181cdf0e10cSrcweir 			return pTab->maUniTab;
182cdf0e10cSrcweir 		pTab = pTab->mpNext;
183cdf0e10cSrcweir 	}
184cdf0e10cSrcweir 
185cdf0e10cSrcweir 	// get TextEncodingInfo
186cdf0e10cSrcweir 	rtl_TextEncodingInfo aTextEncInfo;
187cdf0e10cSrcweir 	aTextEncInfo.StructSize = sizeof( aTextEncInfo );
188cdf0e10cSrcweir 	rtl_getTextEncodingInfo( eTextEncoding, &aTextEncInfo );
189cdf0e10cSrcweir 
190cdf0e10cSrcweir 	if ( aTextEncInfo.MaximumCharSize == 1 )
191cdf0e10cSrcweir 	{
192cdf0e10cSrcweir 		pTab = new Impl1ByteUnicodeTabData;
193cdf0e10cSrcweir 		pTab->meTextEncoding = eTextEncoding;
194cdf0e10cSrcweir 		pTab->mpNext = pToolsData->mpFirstUniTabData;
195cdf0e10cSrcweir 
196cdf0e10cSrcweir 		rtl_TextToUnicodeConverter	hConverter;
197cdf0e10cSrcweir 		sal_uInt32					nInfo;
198cdf0e10cSrcweir 		sal_Size					nSrcBytes;
199cdf0e10cSrcweir 		sal_Size					nDestChars;
200cdf0e10cSrcweir 		hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
201cdf0e10cSrcweir 		nDestChars = rtl_convertTextToUnicode( hConverter, 0,
202cdf0e10cSrcweir 											   (const sal_Char*)aImplByteTab, 256,
203cdf0e10cSrcweir 											   pTab->maUniTab, 256,
204cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE |
205cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
206cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
207cdf0e10cSrcweir 											   &nInfo, &nSrcBytes );
208cdf0e10cSrcweir 		rtl_destroyTextToUnicodeConverter( hConverter );
209cdf0e10cSrcweir 
210cdf0e10cSrcweir 		if ( (nSrcBytes != 256) || (nDestChars != 256) )
211cdf0e10cSrcweir 			delete pTab;
212cdf0e10cSrcweir 		else
213cdf0e10cSrcweir 		{
214cdf0e10cSrcweir 			pToolsData->mpFirstUniTabData = pTab;
215cdf0e10cSrcweir 			return pTab->maUniTab;
216cdf0e10cSrcweir 		}
217cdf0e10cSrcweir 	}
218cdf0e10cSrcweir 
219cdf0e10cSrcweir 	return NULL;
220cdf0e10cSrcweir }
221cdf0e10cSrcweir 
222cdf0e10cSrcweir // -----------------------------------------------------------------------
223cdf0e10cSrcweir 
ImplGet1ByteConvertTab(rtl_TextEncoding eSrcTextEncoding,rtl_TextEncoding eDestTextEncoding,sal_Bool bReplace)224cdf0e10cSrcweir static sal_uChar* ImplGet1ByteConvertTab( rtl_TextEncoding eSrcTextEncoding,
225cdf0e10cSrcweir 										  rtl_TextEncoding eDestTextEncoding,
226cdf0e10cSrcweir 										  sal_Bool bReplace )
227cdf0e10cSrcweir {
228cdf0e10cSrcweir #ifndef BOOTSTRAP
229cdf0e10cSrcweir 	TOOLSINDATA*				pToolsData = ImplGetToolsInData();
230cdf0e10cSrcweir #else
231cdf0e10cSrcweir 	TOOLSINDATA*				pToolsData = 0x0;
232cdf0e10cSrcweir #endif
233cdf0e10cSrcweir 	Impl1ByteConvertTabData*	pTab = pToolsData->mpFirstConvertTabData;
234cdf0e10cSrcweir 
235cdf0e10cSrcweir 	while ( pTab )
236cdf0e10cSrcweir 	{
237cdf0e10cSrcweir 		if ( (pTab->meSrcTextEncoding == eSrcTextEncoding) &&
238cdf0e10cSrcweir 			 (pTab->meDestTextEncoding == eDestTextEncoding) )
239cdf0e10cSrcweir 		{
240cdf0e10cSrcweir 			if ( bReplace )
241cdf0e10cSrcweir 				return pTab->maRepConvertTab;
242cdf0e10cSrcweir 			else
243cdf0e10cSrcweir 				return pTab->maConvertTab;
244cdf0e10cSrcweir 		}
245cdf0e10cSrcweir 		pTab = pTab->mpNext;
246cdf0e10cSrcweir 	}
247cdf0e10cSrcweir 
248cdf0e10cSrcweir 	// get TextEncodingInfo
249cdf0e10cSrcweir 	rtl_TextEncodingInfo aTextEncInfo1;
250cdf0e10cSrcweir 	aTextEncInfo1.StructSize = sizeof( aTextEncInfo1 );
251cdf0e10cSrcweir 	rtl_getTextEncodingInfo( eSrcTextEncoding, &aTextEncInfo1 );
252cdf0e10cSrcweir 	rtl_TextEncodingInfo aTextEncInfo2;
253cdf0e10cSrcweir 	aTextEncInfo2.StructSize = sizeof( aTextEncInfo2 );
254cdf0e10cSrcweir 	rtl_getTextEncodingInfo( eDestTextEncoding, &aTextEncInfo2 );
255cdf0e10cSrcweir 
256cdf0e10cSrcweir 	if ( (aTextEncInfo1.MaximumCharSize == 1) &&
257cdf0e10cSrcweir 		 (aTextEncInfo2.MaximumCharSize == 1) )
258cdf0e10cSrcweir 	{
259cdf0e10cSrcweir 		pTab = new Impl1ByteConvertTabData;
260cdf0e10cSrcweir 		pTab->meSrcTextEncoding = eSrcTextEncoding;
261cdf0e10cSrcweir 		pTab->meDestTextEncoding = eDestTextEncoding;
262cdf0e10cSrcweir 		pTab->mpNext = pToolsData->mpFirstConvertTabData;
263cdf0e10cSrcweir 
264cdf0e10cSrcweir 		rtl_TextToUnicodeConverter	hConverter;
265cdf0e10cSrcweir 		rtl_UnicodeToTextConverter	hConverter2;
266cdf0e10cSrcweir 		sal_uInt32					nInfo;
267cdf0e10cSrcweir 		sal_Size					nSrcBytes;
268cdf0e10cSrcweir 		sal_Size					nDestChars;
269cdf0e10cSrcweir 		sal_Size					nSrcChars;
270cdf0e10cSrcweir 		sal_Size					nDestBytes;
271cdf0e10cSrcweir 		sal_Unicode 				aTempBuf[256];
272cdf0e10cSrcweir 		hConverter = rtl_createTextToUnicodeConverter( eSrcTextEncoding );
273cdf0e10cSrcweir 		nDestChars = rtl_convertTextToUnicode( hConverter, 0,
274cdf0e10cSrcweir 											   (const sal_Char*)aImplByteTab, 256,
275cdf0e10cSrcweir 											   aTempBuf, 256,
276cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
277cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
278cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
279cdf0e10cSrcweir 											   &nInfo, &nSrcBytes );
280cdf0e10cSrcweir 		rtl_destroyTextToUnicodeConverter( hConverter );
281cdf0e10cSrcweir 		if ( (nSrcBytes != 256) || (nDestChars != 256) )
282cdf0e10cSrcweir 			delete pTab;
283cdf0e10cSrcweir 		else
284cdf0e10cSrcweir 		{
285cdf0e10cSrcweir 			hConverter2 = rtl_createUnicodeToTextConverter( eDestTextEncoding );
286cdf0e10cSrcweir 			nDestBytes = rtl_convertUnicodeToText( hConverter2, 0,
287cdf0e10cSrcweir 												   aTempBuf, 256,
288cdf0e10cSrcweir 												   (sal_Char*)pTab->maConvertTab, 256,
289cdf0e10cSrcweir 												   RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0 |
290cdf0e10cSrcweir 												   RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT,
291cdf0e10cSrcweir 												   &nInfo, &nSrcChars );
292cdf0e10cSrcweir 			if ( (nDestBytes == 256) || (nSrcChars == 256) )
293cdf0e10cSrcweir 			{
294cdf0e10cSrcweir 				nDestBytes = rtl_convertUnicodeToText( hConverter2, 0,
295cdf0e10cSrcweir 													   aTempBuf, 256,
296cdf0e10cSrcweir 													   (sal_Char*)pTab->maRepConvertTab, 256,
297cdf0e10cSrcweir 													   RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
298cdf0e10cSrcweir 													   RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT |
299cdf0e10cSrcweir 													   RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE,
300cdf0e10cSrcweir 													   &nInfo, &nSrcChars );
301cdf0e10cSrcweir 			}
302cdf0e10cSrcweir 			rtl_destroyUnicodeToTextConverter( hConverter2 );
303cdf0e10cSrcweir 			if ( (nDestBytes != 256) || (nSrcChars != 256) )
304cdf0e10cSrcweir 				delete pTab;
305cdf0e10cSrcweir 			else
306cdf0e10cSrcweir 			{
307cdf0e10cSrcweir 				pToolsData->mpFirstConvertTabData = pTab;
308cdf0e10cSrcweir 				if ( bReplace )
309cdf0e10cSrcweir 					return pTab->maRepConvertTab;
310cdf0e10cSrcweir 				else
311cdf0e10cSrcweir 					return pTab->maConvertTab;
312cdf0e10cSrcweir 			}
313cdf0e10cSrcweir 		}
314cdf0e10cSrcweir 	}
315cdf0e10cSrcweir 
316cdf0e10cSrcweir 	return NULL;
317cdf0e10cSrcweir }
318cdf0e10cSrcweir 
319cdf0e10cSrcweir // =======================================================================
320cdf0e10cSrcweir 
ImplDeleteCharTabData()321cdf0e10cSrcweir void ImplDeleteCharTabData()
322cdf0e10cSrcweir {
323cdf0e10cSrcweir #ifndef BOOTSTRAP
324cdf0e10cSrcweir 	TOOLSINDATA*				pToolsData = ImplGetToolsInData();
325cdf0e10cSrcweir #else
326cdf0e10cSrcweir 	TOOLSINDATA*				pToolsData = 0x0;
327cdf0e10cSrcweir #endif
328cdf0e10cSrcweir 	Impl1ByteUnicodeTabData*	pTempUniTab;
329cdf0e10cSrcweir 	Impl1ByteUnicodeTabData*	pUniTab = pToolsData->mpFirstUniTabData;
330cdf0e10cSrcweir 	while ( pUniTab )
331cdf0e10cSrcweir 	{
332cdf0e10cSrcweir 		pTempUniTab = pUniTab->mpNext;
333cdf0e10cSrcweir 		delete pUniTab;
334cdf0e10cSrcweir 		pUniTab = pTempUniTab;
335cdf0e10cSrcweir 	}
336cdf0e10cSrcweir 	pToolsData->mpFirstUniTabData = NULL;
337cdf0e10cSrcweir 
338cdf0e10cSrcweir 	Impl1ByteConvertTabData*	pTempConvertTab;
339cdf0e10cSrcweir 	Impl1ByteConvertTabData*	pConvertTab = pToolsData->mpFirstConvertTabData;
340cdf0e10cSrcweir 	while ( pConvertTab )
341cdf0e10cSrcweir 	{
342cdf0e10cSrcweir 		pTempConvertTab = pConvertTab->mpNext;
343cdf0e10cSrcweir 		delete pConvertTab;
344cdf0e10cSrcweir 		pConvertTab = pTempConvertTab;
345cdf0e10cSrcweir 	}
346cdf0e10cSrcweir 	pToolsData->mpFirstConvertTabData = NULL;
347cdf0e10cSrcweir }
348cdf0e10cSrcweir 
349cdf0e10cSrcweir // =======================================================================
350cdf0e10cSrcweir 
ImplStringConvert(rtl_TextEncoding eSource,rtl_TextEncoding eTarget,sal_Bool bReplace)351cdf0e10cSrcweir void ByteString::ImplStringConvert(
352cdf0e10cSrcweir     rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace )
353cdf0e10cSrcweir {
354cdf0e10cSrcweir 	sal_uChar* pConvertTab = ImplGet1ByteConvertTab( eSource, eTarget, bReplace );
355cdf0e10cSrcweir 	if ( pConvertTab )
356cdf0e10cSrcweir 	{
357cdf0e10cSrcweir 		char* pStr = mpData->maStr;
358cdf0e10cSrcweir 		while ( *pStr )
359cdf0e10cSrcweir 		{
360cdf0e10cSrcweir 			sal_uChar c = (sal_uChar)*pStr;
361cdf0e10cSrcweir 			sal_uChar cConv = pConvertTab[c];
362cdf0e10cSrcweir 			if ( c != cConv )
363cdf0e10cSrcweir 			{
364cdf0e10cSrcweir 				pStr = ImplCopyStringData( pStr );
365cdf0e10cSrcweir 				*pStr = (char)cConv;
366cdf0e10cSrcweir 			}
367cdf0e10cSrcweir 
368cdf0e10cSrcweir 			pStr++;
369cdf0e10cSrcweir 		}
370cdf0e10cSrcweir 	}
371cdf0e10cSrcweir 	else
372cdf0e10cSrcweir 	{
373cdf0e10cSrcweir 		rtl_UnicodeToTextConverter	hSrcConverter = rtl_createTextToUnicodeConverter( eSource );
374cdf0e10cSrcweir 		sal_uInt32					nInfo;
375cdf0e10cSrcweir 		sal_Size					nSrcBytes;
376cdf0e10cSrcweir 		sal_Size					nDestChars;
377cdf0e10cSrcweir 		sal_Size					nTempLen;
378cdf0e10cSrcweir 		sal_Unicode*				pTempBuf;
379cdf0e10cSrcweir 		nTempLen = mpData->mnLen;
380cdf0e10cSrcweir 		pTempBuf = new sal_Unicode[nTempLen];
381cdf0e10cSrcweir 		nDestChars = rtl_convertTextToUnicode( hSrcConverter, 0,
382cdf0e10cSrcweir 											   mpData->maStr, mpData->mnLen,
383cdf0e10cSrcweir 											   pTempBuf, nTempLen,
384cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_FLUSH |
385cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE |
386cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
387cdf0e10cSrcweir 											   RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
388cdf0e10cSrcweir 											   &nInfo, &nSrcBytes );
389cdf0e10cSrcweir 		rtl_destroyTextToUnicodeConverter( hSrcConverter );
390cdf0e10cSrcweir 		// Hier werten wir bReplace nicht aus, da fuer MultiByte-Textencodings
391cdf0e10cSrcweir 		// sowieso keine Ersatzdarstellung moeglich ist. Da sich der String
392cdf0e10cSrcweir 		// sowieso in der Laenge aendern kann, nehmen wir auch sonst keine
393cdf0e10cSrcweir 		// Ruecksicht darauf, das die Laenge erhalten bleibt.
394cdf0e10cSrcweir 		ImplUpdateStringFromUniString( pTempBuf, nDestChars, eTarget,
395cdf0e10cSrcweir 									   RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
396cdf0e10cSrcweir 									   RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT |
397cdf0e10cSrcweir 									   RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE |
398cdf0e10cSrcweir 									   RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR |
399cdf0e10cSrcweir 									   RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 |
400cdf0e10cSrcweir 									   RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE |
401cdf0e10cSrcweir 									   RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE );
402cdf0e10cSrcweir 		delete [] pTempBuf;
403cdf0e10cSrcweir 	}
404cdf0e10cSrcweir }
405cdf0e10cSrcweir 
406cdf0e10cSrcweir // =======================================================================
407cdf0e10cSrcweir 
Convert(rtl_TextEncoding eSource,rtl_TextEncoding eTarget,sal_Bool bReplace)408cdf0e10cSrcweir ByteString& ByteString::Convert( rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace )
409cdf0e10cSrcweir {
410cdf0e10cSrcweir 	DBG_CHKTHIS( ByteString, DbgCheckByteString );
411cdf0e10cSrcweir 
412cdf0e10cSrcweir 	// rtl_TextEncoding Dontknow kann nicht konvertiert werden
413cdf0e10cSrcweir 	if ( (eSource == RTL_TEXTENCODING_DONTKNOW) || (eTarget == RTL_TEXTENCODING_DONTKNOW) )
414cdf0e10cSrcweir 		return *this;
415cdf0e10cSrcweir 
416cdf0e10cSrcweir 	// Wenn Source und Target gleich sind, muss nicht konvertiert werden
417cdf0e10cSrcweir 	if ( eSource == eTarget )
418cdf0e10cSrcweir 		return *this;
419cdf0e10cSrcweir 
420cdf0e10cSrcweir 	// rtl_TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten
421cdf0e10cSrcweir 	// wollen wir die Zeichencodes beibehalten
422cdf0e10cSrcweir 	if ( (eSource == RTL_TEXTENCODING_SYMBOL) &&
423cdf0e10cSrcweir 		 (eTarget != RTL_TEXTENCODING_UTF7) &&
424cdf0e10cSrcweir 		 (eTarget != RTL_TEXTENCODING_UTF8) )
425cdf0e10cSrcweir 		return *this;
426cdf0e10cSrcweir 	if ( (eTarget == RTL_TEXTENCODING_SYMBOL) &&
427cdf0e10cSrcweir 		 (eSource != RTL_TEXTENCODING_UTF7) &&
428cdf0e10cSrcweir 		 (eSource != RTL_TEXTENCODING_UTF8) )
429cdf0e10cSrcweir 		return *this;
430cdf0e10cSrcweir 
431cdf0e10cSrcweir 	// Zeichensatz umwandeln
432cdf0e10cSrcweir 	ImplStringConvert( eSource, eTarget, bReplace );
433cdf0e10cSrcweir 
434cdf0e10cSrcweir 	return *this;
435cdf0e10cSrcweir }
436cdf0e10cSrcweir 
437cdf0e10cSrcweir // =======================================================================
438cdf0e10cSrcweir 
Convert(char c,rtl_TextEncoding eSource,rtl_TextEncoding eTarget,sal_Bool bReplace)439cdf0e10cSrcweir char ByteString::Convert( char c,
440cdf0e10cSrcweir 						  rtl_TextEncoding eSource, rtl_TextEncoding eTarget,
441cdf0e10cSrcweir 						  sal_Bool bReplace )
442cdf0e10cSrcweir {
443cdf0e10cSrcweir 	// TextEncoding Dontknow kann nicht konvertiert werden
444cdf0e10cSrcweir 	if ( (eSource == RTL_TEXTENCODING_DONTKNOW) || (eTarget == RTL_TEXTENCODING_DONTKNOW) )
445cdf0e10cSrcweir 		return '\0';
446cdf0e10cSrcweir 
447cdf0e10cSrcweir 	// Wenn Source und Target gleich sind, muss nicht konvertiert werden
448cdf0e10cSrcweir 	if ( eSource == eTarget )
449cdf0e10cSrcweir 		return c;
450cdf0e10cSrcweir 
451cdf0e10cSrcweir 	// TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten
452cdf0e10cSrcweir 	// wollen wir die Zeichencodes beibehalten
453cdf0e10cSrcweir 	if ( (eSource == RTL_TEXTENCODING_SYMBOL) &&
454cdf0e10cSrcweir 		 (eTarget != RTL_TEXTENCODING_UTF7) &&
455cdf0e10cSrcweir 		 (eTarget != RTL_TEXTENCODING_UTF8) )
456cdf0e10cSrcweir 		return '\0';
457cdf0e10cSrcweir 	if ( (eTarget == RTL_TEXTENCODING_SYMBOL) &&
458cdf0e10cSrcweir 		 (eSource != RTL_TEXTENCODING_UTF7) &&
459cdf0e10cSrcweir 		 (eSource != RTL_TEXTENCODING_UTF8) )
460cdf0e10cSrcweir 		return '\0';
461cdf0e10cSrcweir 
462cdf0e10cSrcweir 	sal_uChar* pConvertTab = ImplGet1ByteConvertTab( eSource, eTarget, bReplace );
463cdf0e10cSrcweir 	if ( pConvertTab )
464cdf0e10cSrcweir 		return (char)pConvertTab[(sal_uChar)c];
465cdf0e10cSrcweir 	else
466cdf0e10cSrcweir 		return '\0';
467cdf0e10cSrcweir }
468cdf0e10cSrcweir 
469cdf0e10cSrcweir // =======================================================================
470cdf0e10cSrcweir 
ConvertToUnicode(char c,rtl_TextEncoding eTextEncoding)471cdf0e10cSrcweir sal_Unicode ByteString::ConvertToUnicode( char c, rtl_TextEncoding eTextEncoding )
472cdf0e10cSrcweir {
473cdf0e10cSrcweir 	sal_Size nLen = 1;
474cdf0e10cSrcweir 	return ConvertToUnicode( &c, &nLen, eTextEncoding );
475cdf0e10cSrcweir }
476cdf0e10cSrcweir 
477cdf0e10cSrcweir // -----------------------------------------------------------------------
478cdf0e10cSrcweir 
ConvertFromUnicode(sal_Unicode c,rtl_TextEncoding eTextEncoding,sal_Bool bReplace)479cdf0e10cSrcweir char ByteString::ConvertFromUnicode( sal_Unicode c, rtl_TextEncoding eTextEncoding, sal_Bool bReplace )
480cdf0e10cSrcweir {
481cdf0e10cSrcweir 	sal_Size	nLen;
482cdf0e10cSrcweir 	char		aBuf[30];
483cdf0e10cSrcweir 	nLen = ConvertFromUnicode( c, aBuf, sizeof( aBuf ), eTextEncoding, bReplace );
484cdf0e10cSrcweir 	if ( nLen == 1 )
485cdf0e10cSrcweir 		return aBuf[0];
486cdf0e10cSrcweir 	else
487cdf0e10cSrcweir 		return 0;
488cdf0e10cSrcweir }
489cdf0e10cSrcweir 
490cdf0e10cSrcweir // -----------------------------------------------------------------------
491cdf0e10cSrcweir 
ConvertToUnicode(const char * pChar,sal_Size * pLen,rtl_TextEncoding eTextEncoding)492cdf0e10cSrcweir sal_Unicode ByteString::ConvertToUnicode( const char* pChar, sal_Size* pLen, rtl_TextEncoding eTextEncoding )
493cdf0e10cSrcweir {
494cdf0e10cSrcweir 	// TextEncoding Dontknow wird nicht konvertiert
495cdf0e10cSrcweir 	if ( eTextEncoding == RTL_TEXTENCODING_DONTKNOW )
496cdf0e10cSrcweir 		return 0;
497cdf0e10cSrcweir 
498cdf0e10cSrcweir 	rtl_TextToUnicodeConverter	hConverter;
499cdf0e10cSrcweir 	sal_uInt32					nInfo;
500cdf0e10cSrcweir 	sal_Size					nSrcBytes;
501cdf0e10cSrcweir 	sal_Size					nDestChars;
502cdf0e10cSrcweir 	sal_Unicode 				nConvChar;
503cdf0e10cSrcweir 	hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
504cdf0e10cSrcweir 	nDestChars = rtl_convertTextToUnicode( hConverter, 0,
505cdf0e10cSrcweir 										   (const sal_Char*)pChar, *pLen,
506cdf0e10cSrcweir 										   &nConvChar, 1,
507cdf0e10cSrcweir 										   RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
508cdf0e10cSrcweir 										   RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
509cdf0e10cSrcweir 										   RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
510cdf0e10cSrcweir 										   RTL_TEXTTOUNICODE_FLAGS_FLUSH,
511cdf0e10cSrcweir 										   &nInfo, &nSrcBytes );
512cdf0e10cSrcweir 	rtl_destroyTextToUnicodeConverter( hConverter );
513cdf0e10cSrcweir 
514cdf0e10cSrcweir 	if ( nDestChars == 1 )
515cdf0e10cSrcweir 	{
516cdf0e10cSrcweir 		*pLen = nSrcBytes;
517cdf0e10cSrcweir 		return nConvChar;
518cdf0e10cSrcweir 	}
519cdf0e10cSrcweir 	else
520cdf0e10cSrcweir 	{
521cdf0e10cSrcweir 		*pLen = 0;
522cdf0e10cSrcweir 		return 0;
523cdf0e10cSrcweir 	}
524cdf0e10cSrcweir }
525cdf0e10cSrcweir 
526cdf0e10cSrcweir // -----------------------------------------------------------------------
527cdf0e10cSrcweir 
ConvertFromUnicode(sal_Unicode c,char * pBuf,sal_Size nBufLen,rtl_TextEncoding eTextEncoding,sal_Bool bReplace)528cdf0e10cSrcweir sal_Size ByteString::ConvertFromUnicode( sal_Unicode c, char* pBuf, sal_Size nBufLen, rtl_TextEncoding eTextEncoding,
529cdf0e10cSrcweir 										 sal_Bool bReplace )
530cdf0e10cSrcweir {
531cdf0e10cSrcweir 	// TextEncoding Dontknow wird nicht konvertiert
532cdf0e10cSrcweir 	if ( eTextEncoding == RTL_TEXTENCODING_DONTKNOW )
533cdf0e10cSrcweir 		return '\0';
534cdf0e10cSrcweir 
535cdf0e10cSrcweir 	rtl_UnicodeToTextConverter	hConverter;
536cdf0e10cSrcweir 	sal_uInt32					nInfo;
537cdf0e10cSrcweir 	sal_Size					nSrcChars;
538cdf0e10cSrcweir 	sal_Size					nDestBytes;
539cdf0e10cSrcweir 	sal_Unicode 				cUni = c;
540cdf0e10cSrcweir 	sal_uInt32					nFlags = RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE |
541cdf0e10cSrcweir 										 RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE |
542cdf0e10cSrcweir 										 RTL_UNICODETOTEXT_FLAGS_FLUSH;
543cdf0e10cSrcweir 	if ( bReplace )
544cdf0e10cSrcweir 	{
545cdf0e10cSrcweir 		nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
546cdf0e10cSrcweir 				  RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT;
547cdf0e10cSrcweir 		nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE;
548cdf0e10cSrcweir 		if ( nBufLen > 1 )
549cdf0e10cSrcweir 			nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR;
550cdf0e10cSrcweir 	}
551cdf0e10cSrcweir 	else
552cdf0e10cSrcweir 	{
553cdf0e10cSrcweir 		nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0 |
554cdf0e10cSrcweir 				  RTL_UNICODETOTEXT_FLAGS_INVALID_0;
555cdf0e10cSrcweir 	}
556cdf0e10cSrcweir 
557cdf0e10cSrcweir 	hConverter = rtl_createUnicodeToTextConverter( eTextEncoding );
558cdf0e10cSrcweir 	nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
559cdf0e10cSrcweir 										   &cUni, 1,
560cdf0e10cSrcweir 										   (sal_Char*)pBuf, nBufLen,
561cdf0e10cSrcweir 										   nFlags,
562cdf0e10cSrcweir 										   &nInfo, &nSrcChars );
563cdf0e10cSrcweir 	rtl_destroyUnicodeToTextConverter( hConverter );
564cdf0e10cSrcweir 	return nDestBytes;
565cdf0e10cSrcweir }
566cdf0e10cSrcweir 
567cdf0e10cSrcweir // =======================================================================
568cdf0e10cSrcweir 
ByteString(const rtl::OString & rStr)569cdf0e10cSrcweir ByteString::ByteString( const rtl::OString& rStr )
570cdf0e10cSrcweir 	: mpData(NULL)
571cdf0e10cSrcweir {
572cdf0e10cSrcweir 	DBG_CTOR( ByteString, DbgCheckByteString );
573cdf0e10cSrcweir 
574cdf0e10cSrcweir     OSL_ENSURE(rStr.pData->length < STRING_MAXLEN,
575cdf0e10cSrcweir                "Overflowing rtl::OString -> ByteString cut to zero length");
576cdf0e10cSrcweir 
577cdf0e10cSrcweir 	if (rStr.pData->length < STRING_MAXLEN)
578cdf0e10cSrcweir 	{
579cdf0e10cSrcweir 		mpData = reinterpret_cast< ByteStringData * >(const_cast< rtl::OString & >(rStr).pData);
580cdf0e10cSrcweir 	    STRING_ACQUIRE((STRING_TYPE *)mpData);
581cdf0e10cSrcweir 	}
582cdf0e10cSrcweir 	else
583cdf0e10cSrcweir 	{
584cdf0e10cSrcweir 		STRING_NEW((STRING_TYPE **)&mpData);
585cdf0e10cSrcweir 	}
586cdf0e10cSrcweir }
587cdf0e10cSrcweir 
588cdf0e10cSrcweir // -----------------------------------------------------------------------
589cdf0e10cSrcweir 
Assign(const rtl::OString & rStr)590cdf0e10cSrcweir ByteString& ByteString::Assign( const rtl::OString& rStr )
591cdf0e10cSrcweir {
592cdf0e10cSrcweir 	DBG_CHKTHIS( ByteString, DbgCheckByteString );
593cdf0e10cSrcweir 
594cdf0e10cSrcweir     OSL_ENSURE(rStr.pData->length < STRING_MAXLEN,
595cdf0e10cSrcweir                "Overflowing rtl::OString -> ByteString cut to zero length");
596cdf0e10cSrcweir 
597cdf0e10cSrcweir 	if (rStr.pData->length < STRING_MAXLEN)
598cdf0e10cSrcweir 	{
599cdf0e10cSrcweir 	    STRING_RELEASE((STRING_TYPE *)mpData);
600cdf0e10cSrcweir 		mpData = reinterpret_cast< ByteStringData * >(const_cast< rtl::OString & >(rStr).pData);
601cdf0e10cSrcweir 	    STRING_ACQUIRE((STRING_TYPE *)mpData);
602cdf0e10cSrcweir 	}
603cdf0e10cSrcweir 	else
604cdf0e10cSrcweir 	{
605cdf0e10cSrcweir 		STRING_NEW((STRING_TYPE **)&mpData);
606cdf0e10cSrcweir 	}
607cdf0e10cSrcweir 
608cdf0e10cSrcweir 	return *this;
609cdf0e10cSrcweir }
610