xref: /trunk/main/io/source/TextInputStream/TextInputStream.cxx (revision 1ecadb572e7010ff3b3382ad9bf179dbc6efadbb)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_io.hxx"
30 
31 
32 #include <string.h>
33 #include <osl/mutex.hxx>
34 #include <osl/diagnose.h>
35 
36 #include <rtl/unload.h>
37 
38 #include <uno/mapping.hxx>
39 
40 #include <cppuhelper/factory.hxx>
41 #include <cppuhelper/implbase3.hxx>
42 #include <cppuhelper/implementationentry.hxx>
43 
44 #include <rtl/textenc.h>
45 #include <rtl/tencinfo.h>
46 
47 #include <com/sun/star/io/XTextInputStream.hpp>
48 #include <com/sun/star/io/XActiveDataSink.hpp>
49 #include <com/sun/star/lang/XServiceInfo.hpp>
50 
51 
52 #define IMPLEMENTATION_NAME "com.sun.star.comp.io.TextInputStream"
53 #define SERVICE_NAME "com.sun.star.io.TextInputStream"
54 
55 using namespace ::osl;
56 using namespace ::rtl;
57 using namespace ::cppu;
58 using namespace ::com::sun::star::uno;
59 using namespace ::com::sun::star::lang;
60 using namespace ::com::sun::star::io;
61 using namespace ::com::sun::star::registry;
62 
63 namespace io_TextInputStream
64 {
65     rtl_StandardModuleCount g_moduleCount = MODULE_COUNT_INIT;
66 
67 //===========================================================================
68 // Implementation XTextInputStream
69 
70 typedef WeakImplHelper3< XTextInputStream, XActiveDataSink, XServiceInfo > TextInputStreamHelper;
71 class OCommandEnvironment;
72 
73 #define INITIAL_UNICODE_BUFFER_CAPACITY     0x100
74 #define READ_BYTE_COUNT                     0x100
75 
76 class OTextInputStream : public TextInputStreamHelper
77 {
78     Reference< XInputStream > mxStream;
79 
80     // Encoding
81     OUString mEncoding;
82     sal_Bool mbEncodingInitialized;
83     rtl_TextToUnicodeConverter  mConvText2Unicode;
84     rtl_TextToUnicodeContext    mContextText2Unicode;
85     Sequence<sal_Int8>          mSeqSource;
86 
87     // Internal buffer for characters that are already converted successfully
88     sal_Unicode* mpBuffer;
89     sal_Int32 mnBufferSize;
90     sal_Int32 mnCharsInBuffer;
91     sal_Bool mbReachedEOF;
92 
93     void implResizeBuffer( void );
94     OUString implReadString( const Sequence< sal_Unicode >& Delimiters,
95         sal_Bool bRemoveDelimiter, sal_Bool bFindLineEnd )
96             throw(IOException, RuntimeException);
97     sal_Int32 implReadNext() throw(IOException, RuntimeException);
98 
99 public:
100     OTextInputStream();
101     virtual ~OTextInputStream();
102 
103     // Methods XTextInputStream
104     virtual OUString SAL_CALL readLine(  )
105         throw(IOException, RuntimeException);
106     virtual OUString SAL_CALL readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter )
107         throw(IOException, RuntimeException);
108     virtual sal_Bool SAL_CALL isEOF(  )
109         throw(IOException, RuntimeException);
110     virtual void SAL_CALL setEncoding( const OUString& Encoding ) throw(RuntimeException);
111 
112     // Methods XInputStream
113     virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead )
114         throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException);
115     virtual sal_Int32 SAL_CALL readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead )
116         throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException);
117     virtual void SAL_CALL skipBytes( sal_Int32 nBytesToSkip )
118         throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException);
119     virtual sal_Int32 SAL_CALL available(  )
120         throw(NotConnectedException, IOException, RuntimeException);
121     virtual void SAL_CALL closeInput(  )
122         throw(NotConnectedException, IOException, RuntimeException);
123 
124     // Methods XActiveDataSink
125     virtual void SAL_CALL setInputStream( const Reference< XInputStream >& aStream )
126         throw(RuntimeException);
127     virtual Reference< XInputStream > SAL_CALL getInputStream()
128         throw(RuntimeException);
129 
130     // Methods XServiceInfo
131         virtual OUString              SAL_CALL getImplementationName() throw();
132         virtual Sequence< OUString >  SAL_CALL getSupportedServiceNames(void) throw();
133         virtual sal_Bool              SAL_CALL supportsService(const OUString& ServiceName) throw();
134 };
135 
136 OTextInputStream::OTextInputStream()
137     : mSeqSource( READ_BYTE_COUNT ), mpBuffer( NULL ), mnBufferSize( 0 )
138     , mnCharsInBuffer( 0 ), mbReachedEOF( sal_False )
139 {
140     g_moduleCount.modCnt.acquire( &g_moduleCount.modCnt );
141     mbEncodingInitialized = false;
142 }
143 
144 OTextInputStream::~OTextInputStream()
145 {
146     if( mbEncodingInitialized )
147     {
148         rtl_destroyUnicodeToTextContext( mConvText2Unicode, mContextText2Unicode );
149         rtl_destroyUnicodeToTextConverter( mConvText2Unicode );
150     }
151     g_moduleCount.modCnt.release( &g_moduleCount.modCnt );
152 }
153 
154 void OTextInputStream::implResizeBuffer( void )
155 {
156     sal_Int32 mnNewBufferSize = mnBufferSize * 2;
157     sal_Unicode* pNewBuffer = new sal_Unicode[ mnNewBufferSize ];
158     memcpy( pNewBuffer, mpBuffer, mnCharsInBuffer * sizeof( sal_Unicode ) );
159     mpBuffer = pNewBuffer;
160     mnBufferSize = mnNewBufferSize;
161 }
162 
163 
164 //===========================================================================
165 // XTextInputStream
166 
167 OUString OTextInputStream::readLine(  )
168     throw(IOException, RuntimeException)
169 {
170     static Sequence< sal_Unicode > aDummySeq;
171     return implReadString( aDummySeq, sal_True, sal_True );
172 }
173 
174 OUString OTextInputStream::readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter )
175         throw(IOException, RuntimeException)
176 {
177     return implReadString( Delimiters, bRemoveDelimiter, sal_False );
178 }
179 
180 sal_Bool OTextInputStream::isEOF()
181     throw(IOException, RuntimeException)
182 {
183     sal_Bool bRet = sal_False;
184     if( mnCharsInBuffer == 0 && mbReachedEOF )
185         bRet = sal_True;
186     return bRet;
187 }
188 
189 
190 OUString OTextInputStream::implReadString( const Sequence< sal_Unicode >& Delimiters,
191                                            sal_Bool bRemoveDelimiter, sal_Bool bFindLineEnd )
192         throw(IOException, RuntimeException)
193 {
194     OUString aRetStr;
195     if( !mbEncodingInitialized )
196     {
197         OUString aUtf8Str( RTL_CONSTASCII_USTRINGPARAM("utf8") );
198         setEncoding( aUtf8Str );
199     }
200     if( !mbEncodingInitialized )
201         return aRetStr;
202 
203     if( !mpBuffer )
204     {
205         mnBufferSize = INITIAL_UNICODE_BUFFER_CAPACITY;
206         mpBuffer = new sal_Unicode[ mnBufferSize ];
207     }
208 
209     // Only for bFindLineEnd
210     sal_Unicode cLineEndChar1 = 0x0D;
211     sal_Unicode cLineEndChar2 = 0x0A;
212 
213     sal_Int32 nBufferReadPos = 0;
214     sal_Int32 nCopyLen = 0;
215     sal_Bool bFound = sal_False;
216     sal_Bool bFoundFirstLineEndChar = sal_False;
217     sal_Unicode cFirstLineEndChar = 0;
218     const sal_Unicode* pDelims = Delimiters.getConstArray();
219     const sal_Int32 nDelimCount = Delimiters.getLength();
220     while( !bFound )
221     {
222         // Still characters available?
223         if( nBufferReadPos == mnCharsInBuffer )
224         {
225             // Already reached EOF? Then we can't read any more
226             if( mbReachedEOF )
227                 break;
228 
229             // No, so read new characters
230             if( !implReadNext() )
231                 break;
232         }
233 
234         // Now there should be characters available
235         // (otherwise the loop should have been breaked before)
236         sal_Unicode c = mpBuffer[ nBufferReadPos++ ];
237 
238         if( bFindLineEnd )
239         {
240             if( bFoundFirstLineEndChar )
241             {
242                 bFound = sal_True;
243                 nCopyLen = nBufferReadPos - 2;
244                 if( c == cLineEndChar1 || c == cLineEndChar2 )
245                 {
246                     // Same line end char -> new line break
247                     if( c == cFirstLineEndChar )
248                     {
249                         nBufferReadPos--;
250                     }
251                 }
252                 else
253                 {
254                     // No second line end char
255                     nBufferReadPos--;
256                 }
257             }
258             else if( c == cLineEndChar1 || c == cLineEndChar2 )
259             {
260                 bFoundFirstLineEndChar = sal_True;
261                 cFirstLineEndChar = c;
262             }
263         }
264         else
265         {
266             for( sal_Int32 i = 0 ; i < nDelimCount ; i++ )
267             {
268                 if( c == pDelims[ i ] )
269                 {
270                     bFound = sal_True;
271                     nCopyLen = nBufferReadPos;
272                     if( bRemoveDelimiter )
273                         nCopyLen--;
274                 }
275             }
276         }
277     }
278 
279     // Nothing found? Return all
280     if( !nCopyLen && !bFound && mbReachedEOF )
281         nCopyLen = nBufferReadPos;
282 
283     // Create string
284     if( nCopyLen )
285         aRetStr = OUString( mpBuffer, nCopyLen );
286 
287     // Copy rest of buffer
288     memmove( mpBuffer, mpBuffer + nBufferReadPos,
289         (mnCharsInBuffer - nBufferReadPos) * sizeof( sal_Unicode ) );
290     mnCharsInBuffer -= nBufferReadPos;
291 
292     return aRetStr;
293 }
294 
295 
296 sal_Int32 OTextInputStream::implReadNext()
297         throw(IOException, RuntimeException)
298 {
299     sal_Int32 nFreeBufferSize = mnBufferSize - mnCharsInBuffer;
300     if( nFreeBufferSize < READ_BYTE_COUNT )
301         implResizeBuffer();
302     nFreeBufferSize = mnBufferSize - mnCharsInBuffer;
303 
304     try
305     {
306         sal_Int32 nBytesToRead = READ_BYTE_COUNT;
307         sal_Int32 nRead = mxStream->readSomeBytes( mSeqSource, nBytesToRead );
308         sal_Int32 nTotalRead = nRead;
309         if( nRead < nBytesToRead )
310             mbReachedEOF = sal_True;
311 
312         // Try to convert
313         sal_uInt32 uiInfo;
314         sal_Size nSrcCvtBytes = 0;
315         sal_Size nTargetCount = 0;
316         sal_Size nSourceCount = 0;
317         while( sal_True )
318         {
319             const sal_Int8 *pbSource = mSeqSource.getConstArray();
320 
321             // All invalid characters are transformed to the unicode undefined char
322             nTargetCount += rtl_convertTextToUnicode(
323                                 mConvText2Unicode,
324                                 mContextText2Unicode,
325                                 (const sal_Char*) &( pbSource[nSourceCount] ),
326                                 nTotalRead - nSourceCount,
327                                 mpBuffer + mnCharsInBuffer + nTargetCount,
328                                 nFreeBufferSize - nTargetCount,
329                                 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT   |
330                                 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
331                                 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
332                                 &uiInfo,
333                                 &nSrcCvtBytes );
334             nSourceCount += nSrcCvtBytes;
335 
336             sal_Bool bCont = sal_False;
337             if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL )
338             {
339                 implResizeBuffer();
340                 bCont = sal_True;
341             }
342 
343             if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL )
344             {
345                 // read next byte
346                 static Sequence< sal_Int8 > aOneByteSeq( 1 );
347                 nRead = mxStream->readSomeBytes( aOneByteSeq, 1 );
348                 if( nRead == 0 )
349                 {
350                     mbReachedEOF = sal_True;
351                     break;
352                 }
353 
354                 sal_Int32 nOldLen = mSeqSource.getLength();
355                 nTotalRead++;
356                 if( nTotalRead > nOldLen )
357                 {
358                     mSeqSource.realloc( nTotalRead );
359                 }
360                 mSeqSource.getArray()[ nOldLen ] = aOneByteSeq.getConstArray()[ 0 ];
361                 pbSource = mSeqSource.getConstArray();
362                 bCont = sal_True;
363             }
364 
365             if( bCont )
366                 continue;
367             break;
368         }
369 
370         mnCharsInBuffer += nTargetCount;
371         return nTargetCount;
372     }
373     catch( NotConnectedException& )
374     {
375         throw IOException();
376         //throw IOException( L"OTextInputStream::implReadString failed" );
377     }
378     catch( BufferSizeExceededException& )
379     {
380         throw IOException();
381     }
382 }
383 
384 void OTextInputStream::setEncoding( const OUString& Encoding )
385     throw(RuntimeException)
386 {
387     OString aOEncodingStr = OUStringToOString( Encoding, RTL_TEXTENCODING_ASCII_US );
388     rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( aOEncodingStr.getStr() );
389     if( RTL_TEXTENCODING_DONTKNOW == encoding )
390         return;
391 
392     mbEncodingInitialized = true;
393     mConvText2Unicode = rtl_createTextToUnicodeConverter( encoding );
394     mContextText2Unicode = rtl_createTextToUnicodeContext( mConvText2Unicode );
395     mEncoding = Encoding;
396 }
397 
398 //===========================================================================
399 // XInputStream
400 
401 sal_Int32 OTextInputStream::readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead )
402     throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
403 {
404     return mxStream->readBytes( aData, nBytesToRead );
405 }
406 
407 sal_Int32 OTextInputStream::readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead )
408     throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
409 {
410     return mxStream->readSomeBytes( aData, nMaxBytesToRead );
411 }
412 
413 void OTextInputStream::skipBytes( sal_Int32 nBytesToSkip )
414     throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
415 {
416     mxStream->skipBytes( nBytesToSkip );
417 }
418 
419 sal_Int32 OTextInputStream::available(  )
420     throw(NotConnectedException, IOException, RuntimeException)
421 {
422     return mxStream->available();
423 }
424 
425 void OTextInputStream::closeInput(  )
426     throw(NotConnectedException, IOException, RuntimeException)
427 {
428     mxStream->closeInput();
429 }
430 
431 
432 //===========================================================================
433 // XActiveDataSink
434 
435 void OTextInputStream::setInputStream( const Reference< XInputStream >& aStream )
436     throw(RuntimeException)
437 {
438     mxStream = aStream;
439 }
440 
441 Reference< XInputStream > OTextInputStream::getInputStream()
442     throw(RuntimeException)
443 {
444     return mxStream;
445 }
446 
447 
448 Reference< XInterface > SAL_CALL TextInputStream_CreateInstance( const Reference< XComponentContext > &)
449 {
450     return Reference < XInterface >( ( OWeakObject * ) new OTextInputStream() );
451 }
452 
453 OUString TextInputStream_getImplementationName()
454 {
455     return OUString( RTL_CONSTASCII_USTRINGPARAM( IMPLEMENTATION_NAME ) );
456 }
457 
458 Sequence< OUString > TextInputStream_getSupportedServiceNames()
459 {
460     static Sequence < OUString > *pNames = 0;
461     if( ! pNames )
462     {
463         MutexGuard guard( Mutex::getGlobalMutex() );
464         if( !pNames )
465         {
466             static Sequence< OUString > seqNames(1);
467             seqNames.getArray()[0] = OUString( RTL_CONSTASCII_USTRINGPARAM( SERVICE_NAME ) );
468             pNames = &seqNames;
469         }
470     }
471     return *pNames;
472 }
473 
474 OUString OTextInputStream::getImplementationName() throw()
475 {
476     return TextInputStream_getImplementationName();
477 }
478 
479 sal_Bool OTextInputStream::supportsService(const OUString& ServiceName) throw()
480 {
481     Sequence< OUString > aSNL = getSupportedServiceNames();
482     const OUString * pArray = aSNL.getConstArray();
483 
484     for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
485         if( pArray[i] == ServiceName )
486             return sal_True;
487 
488     return sal_False;
489 }
490 
491 Sequence< OUString > OTextInputStream::getSupportedServiceNames(void) throw()
492 {
493     return TextInputStream_getSupportedServiceNames();
494 }
495 
496 }
497 
498 using namespace io_TextInputStream;
499 
500 static struct ImplementationEntry g_entries[] =
501 {
502     {
503         TextInputStream_CreateInstance, TextInputStream_getImplementationName ,
504         TextInputStream_getSupportedServiceNames, createSingleComponentFactory ,
505         &g_moduleCount.modCnt , 0
506     },
507     { 0, 0, 0, 0, 0, 0 }
508 };
509 
510 extern "C"
511 {
512 sal_Bool SAL_CALL component_canUnload( TimeValue *pTime )
513 {
514     return g_moduleCount.canUnload( &g_moduleCount , pTime );
515 }
516 
517 //==================================================================================================
518 void SAL_CALL component_getImplementationEnvironment(
519     const sal_Char ** ppEnvTypeName, uno_Environment ** )
520 {
521     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
522 }
523 //==================================================================================================
524 void * SAL_CALL component_getFactory(
525     const sal_Char * pImplName, void * pServiceManager, void * pRegistryKey )
526 {
527     return component_getFactoryHelper( pImplName, pServiceManager, pRegistryKey , g_entries );
528 }
529 }
530 
531 
532