/************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *************************************************************/ // no include "precompiled_tools.hxx" because this is included in other cxx files. // ----------------------------------------------------------------------- void ByteString::ImplUpdateStringFromUniString( const sal_Unicode* pUniStr, sal_Size nUniLen, rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags ) { ByteStringData* pNewStringData = NULL; rtl_uString2String( (rtl_String **)(&pNewStringData), pUniStr, nUniLen, eTextEncoding, nCvtFlags ); STRING_RELEASE((STRING_TYPE *)mpData); mpData = pNewStringData; } // ======================================================================= ByteString::ByteString( const UniString& rUniStr, rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags ) { DBG_CTOR( ByteString, DbgCheckByteString ); DBG_CHKOBJ( &rUniStr, UniString, DbgCheckUniString ); mpData = NULL; rtl_uString2String( (rtl_String **)(&mpData), rUniStr.mpData->maStr, rUniStr.mpData->mnLen, eTextEncoding, nCvtFlags ); } // ----------------------------------------------------------------------- ByteString::ByteString( const UniString& rUniStr, xub_StrLen nPos, xub_StrLen nLen, rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags ) { DBG_CTOR( ByteString, DbgCheckByteString ); DBG_CHKOBJ( &rUniStr, UniString, DbgCheckUniString ); // Stringlaenge ermitteln if ( nPos > rUniStr.mpData->mnLen ) nLen = 0; else { // Laenge korrigieren, wenn noetig sal_Int32 nMaxLen = rUniStr.mpData->mnLen-nPos; if ( nLen > nMaxLen ) nLen = static_cast< xub_StrLen >(nMaxLen); } mpData = NULL; rtl_uString2String( (rtl_String **)(&mpData), rUniStr.mpData->maStr+nPos, nLen, eTextEncoding, nCvtFlags ); } // ----------------------------------------------------------------------- ByteString::ByteString( const sal_Unicode* pUniStr, rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags ) { DBG_CTOR( ByteString, DbgCheckByteString ); DBG_ASSERT( pUniStr, "ByteString::ByteString() - pUniStr is NULL" ); mpData = NULL; rtl_uString2String( (rtl_String **)(&mpData), pUniStr, ImplStringLen( pUniStr ), eTextEncoding, nCvtFlags ); } // ----------------------------------------------------------------------- ByteString::ByteString( const sal_Unicode* pUniStr, xub_StrLen nLen, rtl_TextEncoding eTextEncoding, sal_uInt32 nCvtFlags ) { DBG_CTOR( ByteString, DbgCheckByteString ); DBG_ASSERT( pUniStr, "ByteString::ByteString() - pUniStr is NULL" ); if ( nLen == STRING_LEN ) nLen = ImplStringLen( pUniStr ); mpData = NULL; rtl_uString2String( (rtl_String **)(&mpData), pUniStr, nLen, eTextEncoding, nCvtFlags ); } // ======================================================================= static sal_uChar aImplByteTab[256] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 }; // ======================================================================= struct Impl1ByteUnicodeTabData { rtl_TextEncoding meTextEncoding; sal_Unicode maUniTab[256]; Impl1ByteUnicodeTabData* mpNext; }; // ----------------------------------------------------------------------- struct Impl1ByteConvertTabData { rtl_TextEncoding meSrcTextEncoding; rtl_TextEncoding meDestTextEncoding; sal_uChar maConvertTab[256]; sal_uChar maRepConvertTab[256]; Impl1ByteConvertTabData* mpNext; }; // ======================================================================= sal_Unicode* ImplGet1ByteUnicodeTab( rtl_TextEncoding eTextEncoding ) { #ifndef BOOTSTRAP TOOLSINDATA* pToolsData = ImplGetToolsInData(); #else TOOLSINDATA* pToolsData = 0x0; #endif Impl1ByteUnicodeTabData* pTab = pToolsData->mpFirstUniTabData; while ( pTab ) { if ( pTab->meTextEncoding == eTextEncoding ) return pTab->maUniTab; pTab = pTab->mpNext; } // get TextEncodingInfo rtl_TextEncodingInfo aTextEncInfo; aTextEncInfo.StructSize = sizeof( aTextEncInfo ); rtl_getTextEncodingInfo( eTextEncoding, &aTextEncInfo ); if ( aTextEncInfo.MaximumCharSize == 1 ) { pTab = new Impl1ByteUnicodeTabData; pTab->meTextEncoding = eTextEncoding; pTab->mpNext = pToolsData->mpFirstUniTabData; rtl_TextToUnicodeConverter hConverter; sal_uInt32 nInfo; sal_Size nSrcBytes; sal_Size nDestChars; hConverter = rtl_createTextToUnicodeConverter( eTextEncoding ); nDestChars = rtl_convertTextToUnicode( hConverter, 0, (const sal_Char*)aImplByteTab, 256, pTab->maUniTab, 256, RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT, &nInfo, &nSrcBytes ); rtl_destroyTextToUnicodeConverter( hConverter ); if ( (nSrcBytes != 256) || (nDestChars != 256) ) delete pTab; else { pToolsData->mpFirstUniTabData = pTab; return pTab->maUniTab; } } return NULL; } // ----------------------------------------------------------------------- static sal_uChar* ImplGet1ByteConvertTab( rtl_TextEncoding eSrcTextEncoding, rtl_TextEncoding eDestTextEncoding, sal_Bool bReplace ) { #ifndef BOOTSTRAP TOOLSINDATA* pToolsData = ImplGetToolsInData(); #else TOOLSINDATA* pToolsData = 0x0; #endif Impl1ByteConvertTabData* pTab = pToolsData->mpFirstConvertTabData; while ( pTab ) { if ( (pTab->meSrcTextEncoding == eSrcTextEncoding) && (pTab->meDestTextEncoding == eDestTextEncoding) ) { if ( bReplace ) return pTab->maRepConvertTab; else return pTab->maConvertTab; } pTab = pTab->mpNext; } // get TextEncodingInfo rtl_TextEncodingInfo aTextEncInfo1; aTextEncInfo1.StructSize = sizeof( aTextEncInfo1 ); rtl_getTextEncodingInfo( eSrcTextEncoding, &aTextEncInfo1 ); rtl_TextEncodingInfo aTextEncInfo2; aTextEncInfo2.StructSize = sizeof( aTextEncInfo2 ); rtl_getTextEncodingInfo( eDestTextEncoding, &aTextEncInfo2 ); if ( (aTextEncInfo1.MaximumCharSize == 1) && (aTextEncInfo2.MaximumCharSize == 1) ) { pTab = new Impl1ByteConvertTabData; pTab->meSrcTextEncoding = eSrcTextEncoding; pTab->meDestTextEncoding = eDestTextEncoding; pTab->mpNext = pToolsData->mpFirstConvertTabData; rtl_TextToUnicodeConverter hConverter; rtl_UnicodeToTextConverter hConverter2; sal_uInt32 nInfo; sal_Size nSrcBytes; sal_Size nDestChars; sal_Size nSrcChars; sal_Size nDestBytes; sal_Unicode aTempBuf[256]; hConverter = rtl_createTextToUnicodeConverter( eSrcTextEncoding ); nDestChars = rtl_convertTextToUnicode( hConverter, 0, (const sal_Char*)aImplByteTab, 256, aTempBuf, 256, RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT, &nInfo, &nSrcBytes ); rtl_destroyTextToUnicodeConverter( hConverter ); if ( (nSrcBytes != 256) || (nDestChars != 256) ) delete pTab; else { hConverter2 = rtl_createUnicodeToTextConverter( eDestTextEncoding ); nDestBytes = rtl_convertUnicodeToText( hConverter2, 0, aTempBuf, 256, (sal_Char*)pTab->maConvertTab, 256, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0 | RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT, &nInfo, &nSrcChars ); if ( (nDestBytes == 256) || (nSrcChars == 256) ) { nDestBytes = rtl_convertUnicodeToText( hConverter2, 0, aTempBuf, 256, (sal_Char*)pTab->maRepConvertTab, 256, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT | RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE, &nInfo, &nSrcChars ); } rtl_destroyUnicodeToTextConverter( hConverter2 ); if ( (nDestBytes != 256) || (nSrcChars != 256) ) delete pTab; else { pToolsData->mpFirstConvertTabData = pTab; if ( bReplace ) return pTab->maRepConvertTab; else return pTab->maConvertTab; } } } return NULL; } // ======================================================================= void ImplDeleteCharTabData() { #ifndef BOOTSTRAP TOOLSINDATA* pToolsData = ImplGetToolsInData(); #else TOOLSINDATA* pToolsData = 0x0; #endif Impl1ByteUnicodeTabData* pTempUniTab; Impl1ByteUnicodeTabData* pUniTab = pToolsData->mpFirstUniTabData; while ( pUniTab ) { pTempUniTab = pUniTab->mpNext; delete pUniTab; pUniTab = pTempUniTab; } pToolsData->mpFirstUniTabData = NULL; Impl1ByteConvertTabData* pTempConvertTab; Impl1ByteConvertTabData* pConvertTab = pToolsData->mpFirstConvertTabData; while ( pConvertTab ) { pTempConvertTab = pConvertTab->mpNext; delete pConvertTab; pConvertTab = pTempConvertTab; } pToolsData->mpFirstConvertTabData = NULL; } // ======================================================================= void ByteString::ImplStringConvert( rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace ) { sal_uChar* pConvertTab = ImplGet1ByteConvertTab( eSource, eTarget, bReplace ); if ( pConvertTab ) { char* pStr = mpData->maStr; while ( *pStr ) { sal_uChar c = (sal_uChar)*pStr; sal_uChar cConv = pConvertTab[c]; if ( c != cConv ) { pStr = ImplCopyStringData( pStr ); *pStr = (char)cConv; } pStr++; } } else { rtl_UnicodeToTextConverter hSrcConverter = rtl_createTextToUnicodeConverter( eSource ); sal_uInt32 nInfo; sal_Size nSrcBytes; sal_Size nDestChars; sal_Size nTempLen; sal_Unicode* pTempBuf; nTempLen = mpData->mnLen; pTempBuf = new sal_Unicode[nTempLen]; nDestChars = rtl_convertTextToUnicode( hSrcConverter, 0, mpData->maStr, mpData->mnLen, pTempBuf, nTempLen, RTL_TEXTTOUNICODE_FLAGS_FLUSH | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT, &nInfo, &nSrcBytes ); rtl_destroyTextToUnicodeConverter( hSrcConverter ); // Hier werten wir bReplace nicht aus, da fuer MultiByte-Textencodings // sowieso keine Ersatzdarstellung moeglich ist. Da sich der String // sowieso in der Laenge aendern kann, nehmen wir auch sonst keine // Ruecksicht darauf, das die Laenge erhalten bleibt. ImplUpdateStringFromUniString( pTempBuf, nDestChars, eTarget, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT | RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR | RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 | RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE | RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE ); delete [] pTempBuf; } } // ======================================================================= ByteString& ByteString::Convert( rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace ) { DBG_CHKTHIS( ByteString, DbgCheckByteString ); // rtl_TextEncoding Dontknow kann nicht konvertiert werden if ( (eSource == RTL_TEXTENCODING_DONTKNOW) || (eTarget == RTL_TEXTENCODING_DONTKNOW) ) return *this; // Wenn Source und Target gleich sind, muss nicht konvertiert werden if ( eSource == eTarget ) return *this; // rtl_TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten // wollen wir die Zeichencodes beibehalten if ( (eSource == RTL_TEXTENCODING_SYMBOL) && (eTarget != RTL_TEXTENCODING_UTF7) && (eTarget != RTL_TEXTENCODING_UTF8) ) return *this; if ( (eTarget == RTL_TEXTENCODING_SYMBOL) && (eSource != RTL_TEXTENCODING_UTF7) && (eSource != RTL_TEXTENCODING_UTF8) ) return *this; // Zeichensatz umwandeln ImplStringConvert( eSource, eTarget, bReplace ); return *this; } // ======================================================================= char ByteString::Convert( char c, rtl_TextEncoding eSource, rtl_TextEncoding eTarget, sal_Bool bReplace ) { // TextEncoding Dontknow kann nicht konvertiert werden if ( (eSource == RTL_TEXTENCODING_DONTKNOW) || (eTarget == RTL_TEXTENCODING_DONTKNOW) ) return '\0'; // Wenn Source und Target gleich sind, muss nicht konvertiert werden if ( eSource == eTarget ) return c; // TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten // wollen wir die Zeichencodes beibehalten if ( (eSource == RTL_TEXTENCODING_SYMBOL) && (eTarget != RTL_TEXTENCODING_UTF7) && (eTarget != RTL_TEXTENCODING_UTF8) ) return '\0'; if ( (eTarget == RTL_TEXTENCODING_SYMBOL) && (eSource != RTL_TEXTENCODING_UTF7) && (eSource != RTL_TEXTENCODING_UTF8) ) return '\0'; sal_uChar* pConvertTab = ImplGet1ByteConvertTab( eSource, eTarget, bReplace ); if ( pConvertTab ) return (char)pConvertTab[(sal_uChar)c]; else return '\0'; } // ======================================================================= sal_Unicode ByteString::ConvertToUnicode( char c, rtl_TextEncoding eTextEncoding ) { sal_Size nLen = 1; return ConvertToUnicode( &c, &nLen, eTextEncoding ); } // ----------------------------------------------------------------------- char ByteString::ConvertFromUnicode( sal_Unicode c, rtl_TextEncoding eTextEncoding, sal_Bool bReplace ) { sal_Size nLen; char aBuf[30]; nLen = ConvertFromUnicode( c, aBuf, sizeof( aBuf ), eTextEncoding, bReplace ); if ( nLen == 1 ) return aBuf[0]; else return 0; } // ----------------------------------------------------------------------- sal_Unicode ByteString::ConvertToUnicode( const char* pChar, sal_Size* pLen, rtl_TextEncoding eTextEncoding ) { // TextEncoding Dontknow wird nicht konvertiert if ( eTextEncoding == RTL_TEXTENCODING_DONTKNOW ) return 0; rtl_TextToUnicodeConverter hConverter; sal_uInt32 nInfo; sal_Size nSrcBytes; sal_Size nDestChars; sal_Unicode nConvChar; hConverter = rtl_createTextToUnicodeConverter( eTextEncoding ); nDestChars = rtl_convertTextToUnicode( hConverter, 0, (const sal_Char*)pChar, *pLen, &nConvChar, 1, RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT | RTL_TEXTTOUNICODE_FLAGS_FLUSH, &nInfo, &nSrcBytes ); rtl_destroyTextToUnicodeConverter( hConverter ); if ( nDestChars == 1 ) { *pLen = nSrcBytes; return nConvChar; } else { *pLen = 0; return 0; } } // ----------------------------------------------------------------------- sal_Size ByteString::ConvertFromUnicode( sal_Unicode c, char* pBuf, sal_Size nBufLen, rtl_TextEncoding eTextEncoding, sal_Bool bReplace ) { // TextEncoding Dontknow wird nicht konvertiert if ( eTextEncoding == RTL_TEXTENCODING_DONTKNOW ) return '\0'; rtl_UnicodeToTextConverter hConverter; sal_uInt32 nInfo; sal_Size nSrcChars; sal_Size nDestBytes; sal_Unicode cUni = c; sal_uInt32 nFlags = RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE | RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE | RTL_UNICODETOTEXT_FLAGS_FLUSH; if ( bReplace ) { nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT | RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT; nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE; if ( nBufLen > 1 ) nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR; } else { nFlags |= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0 | RTL_UNICODETOTEXT_FLAGS_INVALID_0; } hConverter = rtl_createUnicodeToTextConverter( eTextEncoding ); nDestBytes = rtl_convertUnicodeToText( hConverter, 0, &cUni, 1, (sal_Char*)pBuf, nBufLen, nFlags, &nInfo, &nSrcChars ); rtl_destroyUnicodeToTextConverter( hConverter ); return nDestBytes; } // ======================================================================= ByteString::ByteString( const rtl::OString& rStr ) : mpData(NULL) { DBG_CTOR( ByteString, DbgCheckByteString ); OSL_ENSURE(rStr.pData->length < STRING_MAXLEN, "Overflowing rtl::OString -> ByteString cut to zero length"); if (rStr.pData->length < STRING_MAXLEN) { mpData = reinterpret_cast< ByteStringData * >(const_cast< rtl::OString & >(rStr).pData); STRING_ACQUIRE((STRING_TYPE *)mpData); } else { STRING_NEW((STRING_TYPE **)&mpData); } } // ----------------------------------------------------------------------- ByteString& ByteString::Assign( const rtl::OString& rStr ) { DBG_CHKTHIS( ByteString, DbgCheckByteString ); OSL_ENSURE(rStr.pData->length < STRING_MAXLEN, "Overflowing rtl::OString -> ByteString cut to zero length"); if (rStr.pData->length < STRING_MAXLEN) { STRING_RELEASE((STRING_TYPE *)mpData); mpData = reinterpret_cast< ByteStringData * >(const_cast< rtl::OString & >(rStr).pData); STRING_ACQUIRE((STRING_TYPE *)mpData); } else { STRING_NEW((STRING_TYPE **)&mpData); } return *this; }