1*8d192041SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*8d192041SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*8d192041SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*8d192041SAndrew Rist * distributed with this work for additional information 6*8d192041SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*8d192041SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*8d192041SAndrew Rist * "License"); you may not use this file except in compliance 9*8d192041SAndrew Rist * with the License. You may obtain a copy of the License at 10*8d192041SAndrew Rist * 11*8d192041SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*8d192041SAndrew Rist * 13*8d192041SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*8d192041SAndrew Rist * software distributed under the License is distributed on an 15*8d192041SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*8d192041SAndrew Rist * KIND, either express or implied. See the License for the 17*8d192041SAndrew Rist * specific language governing permissions and limitations 18*8d192041SAndrew Rist * under the License. 19*8d192041SAndrew Rist * 20*8d192041SAndrew Rist *************************************************************/ 21*8d192041SAndrew Rist 22*8d192041SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // TODO: Woher? 25cdf0e10cSrcweir #define Max( a, b ) (((a)>(b)) ? (a) : (b) ) 26cdf0e10cSrcweir #define Min( a, b ) (((a)<(b)) ? (a) : (b) ) 27cdf0e10cSrcweir 28cdf0e10cSrcweir /* 29cdf0e10cSrcweir * 30cdf0e10cSrcweir * Text2UnicodeConverter 31cdf0e10cSrcweir * 32cdf0e10cSrcweir **/ 33cdf0e10cSrcweir namespace sax_expatwrap { 34cdf0e10cSrcweir 35cdf0e10cSrcweir class Text2UnicodeConverter 36cdf0e10cSrcweir { 37cdf0e10cSrcweir 38cdf0e10cSrcweir public: 39cdf0e10cSrcweir Text2UnicodeConverter( const ::rtl::OString & sEncoding ); 40cdf0e10cSrcweir ~Text2UnicodeConverter(); 41cdf0e10cSrcweir 42cdf0e10cSrcweir ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & ); canContinue()43cdf0e10cSrcweir sal_Bool canContinue() { return m_bCanContinue; } 44cdf0e10cSrcweir 45cdf0e10cSrcweir private: 46cdf0e10cSrcweir void init( rtl_TextEncoding encoding ); 47cdf0e10cSrcweir 48cdf0e10cSrcweir rtl_TextToUnicodeConverter m_convText2Unicode; 49cdf0e10cSrcweir rtl_TextToUnicodeContext m_contextText2Unicode; 50cdf0e10cSrcweir sal_Bool m_bCanContinue; 51cdf0e10cSrcweir sal_Bool m_bInitialized; 52cdf0e10cSrcweir rtl_TextEncoding m_rtlEncoding; 53cdf0e10cSrcweir ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource; 54cdf0e10cSrcweir }; 55cdf0e10cSrcweir 56cdf0e10cSrcweir /*---------------------------------------- 57cdf0e10cSrcweir * 58cdf0e10cSrcweir * Unicode2TextConverter 59cdf0e10cSrcweir * 60cdf0e10cSrcweir **-----------------------------------------*/ 61cdf0e10cSrcweir class Unicode2TextConverter 62cdf0e10cSrcweir { 63cdf0e10cSrcweir public: 64cdf0e10cSrcweir Unicode2TextConverter( rtl_TextEncoding encoding ); 65cdf0e10cSrcweir ~Unicode2TextConverter(); 66cdf0e10cSrcweir convert(const::rtl::OUString & s)67cdf0e10cSrcweir inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const ::rtl::OUString &s ) 68cdf0e10cSrcweir { 69cdf0e10cSrcweir return convert( s.getStr() , s.getLength() ); 70cdf0e10cSrcweir } 71cdf0e10cSrcweir ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength ); canContinue()72cdf0e10cSrcweir sal_Bool canContinue() { return m_bCanContinue; } 73cdf0e10cSrcweir 74cdf0e10cSrcweir private: 75cdf0e10cSrcweir void init( rtl_TextEncoding encoding ); 76cdf0e10cSrcweir 77cdf0e10cSrcweir rtl_UnicodeToTextConverter m_convUnicode2Text; 78cdf0e10cSrcweir rtl_UnicodeToTextContext m_contextUnicode2Text; 79cdf0e10cSrcweir sal_Bool m_bCanContinue; 80cdf0e10cSrcweir sal_Bool m_bInitialized; 81cdf0e10cSrcweir rtl_TextEncoding m_rtlEncoding; 82cdf0e10cSrcweir ::com::sun::star::uno::Sequence<sal_Unicode> m_seqSource; 83cdf0e10cSrcweir }; 84cdf0e10cSrcweir 85cdf0e10cSrcweir 86cdf0e10cSrcweir 87cdf0e10cSrcweir /*---------------------------------------- 88cdf0e10cSrcweir * 89cdf0e10cSrcweir * XMLFile2UTFConverter 90cdf0e10cSrcweir * 91cdf0e10cSrcweir **-----------------------------------------*/ 92cdf0e10cSrcweir class XMLFile2UTFConverter 93cdf0e10cSrcweir { 94cdf0e10cSrcweir public: XMLFile2UTFConverter()95cdf0e10cSrcweir XMLFile2UTFConverter( ): 96cdf0e10cSrcweir m_bStarted( sal_False ), 97cdf0e10cSrcweir m_pText2Unicode( 0 ), 98cdf0e10cSrcweir m_pUnicode2Text( 0 ) 99cdf0e10cSrcweir {} 100cdf0e10cSrcweir 101cdf0e10cSrcweir ~XMLFile2UTFConverter(); 102cdf0e10cSrcweir setInputStream(::com::sun::star::uno::Reference<::com::sun::star::io::XInputStream> & r)103cdf0e10cSrcweir void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; } setEncoding(const::rtl::OString & s)104cdf0e10cSrcweir void setEncoding( const ::rtl::OString &s ) { m_sEncoding = s; } 105cdf0e10cSrcweir 106cdf0e10cSrcweir 107cdf0e10cSrcweir 108cdf0e10cSrcweir // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There 109cdf0e10cSrcweir // may be returned less or more bytes than ordered. 110cdf0e10cSrcweir sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead ) 111cdf0e10cSrcweir throw ( ::com::sun::star::io::IOException, 112cdf0e10cSrcweir ::com::sun::star::io::NotConnectedException , 113cdf0e10cSrcweir ::com::sun::star::io::BufferSizeExceededException , 114cdf0e10cSrcweir ::com::sun::star::uno::RuntimeException ); 115cdf0e10cSrcweir 116cdf0e10cSrcweir private: 117cdf0e10cSrcweir 118cdf0e10cSrcweir // Called only on first Sequence of bytes. Tries to figure out file format and encoding information. 119cdf0e10cSrcweir // @return TRUE, when encoding information could be retrieved 120cdf0e10cSrcweir // @return FALSE, when no encoding information was found in file 121cdf0e10cSrcweir sal_Bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq ); 122cdf0e10cSrcweir 123cdf0e10cSrcweir // Called only on first Sequence of bytes. Tries to figure out 124cdf0e10cSrcweir // if enough data is available to scan encoding 125cdf0e10cSrcweir // @return TRUE, when encoding is retrievable 126cdf0e10cSrcweir // @return FALSE, when more data is needed 127cdf0e10cSrcweir sal_Bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq ); 128cdf0e10cSrcweir 129cdf0e10cSrcweir // When encoding attribute is within the text (in the first line), it is removed. 130cdf0e10cSrcweir void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq ); 131cdf0e10cSrcweir 132cdf0e10cSrcweir // Initializes decoding depending on m_sEncoding setting 133cdf0e10cSrcweir void initializeDecoding(); 134cdf0e10cSrcweir private: 135cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > m_in; 136cdf0e10cSrcweir 137cdf0e10cSrcweir sal_Bool m_bStarted; 138cdf0e10cSrcweir ::rtl::OString m_sEncoding; 139cdf0e10cSrcweir 140cdf0e10cSrcweir Text2UnicodeConverter *m_pText2Unicode; 141cdf0e10cSrcweir Unicode2TextConverter *m_pUnicode2Text; 142cdf0e10cSrcweir }; 143cdf0e10cSrcweir } 144