1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 //#include <stdlib.h>
25 //#include <sal/alloca.h>
26
27 #include <boost/scoped_ptr.hpp>
28
29 #include <osl/diagnose.h>
30 #include <rtl/ustrbuf.hxx>
31
32 #include <com/sun/star/lang/DisposedException.hpp>
33 #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
34 #include <com/sun/star/xml/sax/SAXParseException.hpp>
35 #include <com/sun/star/xml/sax/FastToken.hpp>
36
37 #include "fastparser.hxx"
38
39 #include <string.h>
40
41 using ::rtl::OString;
42 using ::rtl::OUString;
43 using ::rtl::OUStringBuffer;
44 using namespace ::std;
45 using namespace ::osl;
46 using namespace ::cppu;
47 using namespace ::com::sun::star::uno;
48 using namespace ::com::sun::star::lang;
49 using namespace ::com::sun::star::xml::sax;
50 //using namespace ::com::sun::star::util;
51 using namespace ::com::sun::star::io;
52
53 namespace sax_fastparser {
54
55 // --------------------------------------------------------------------
56
57 struct SaxContextImpl
58 {
59 Reference< XFastContextHandler > mxContext;
60 sal_uInt32 mnNamespaceCount;
61 sal_Int32 mnElementToken;
62 OUString maNamespace;
63 OUString maElementName;
64
SaxContextImplsax_fastparser::SaxContextImpl65 SaxContextImpl() { mnNamespaceCount = 0; mnElementToken = 0; }
SaxContextImplsax_fastparser::SaxContextImpl66 SaxContextImpl( const SaxContextImplPtr& p ) { mnNamespaceCount = p->mnNamespaceCount; mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; }
67 };
68
69 // --------------------------------------------------------------------
70
71 struct NamespaceDefine
72 {
73 OString maPrefix;
74 sal_Int32 mnToken;
75 OUString maNamespaceURL;
76
NamespaceDefinesax_fastparser::NamespaceDefine77 NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {}
78 };
79
80 // --------------------------------------------------------------------
81 // FastLocatorImpl
82 // --------------------------------------------------------------------
83
84 class FastSaxParser;
85
86 class FastLocatorImpl : public WeakImplHelper1< XLocator >
87 {
88 public:
FastLocatorImpl(FastSaxParser * p)89 FastLocatorImpl( FastSaxParser *p ) : mpParser(p) {}
90
dispose()91 void dispose() { mpParser = 0; }
checkDispose()92 void checkDispose() throw (RuntimeException) { if( !mpParser ) throw DisposedException(); }
93
94 //XLocator
95 virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (RuntimeException);
96 virtual sal_Int32 SAL_CALL getLineNumber(void) throw (RuntimeException);
97 virtual OUString SAL_CALL getPublicId(void) throw (RuntimeException);
98 virtual OUString SAL_CALL getSystemId(void) throw (RuntimeException);
99
100 private:
101 FastSaxParser *mpParser;
102 };
103
104 // --------------------------------------------------------------------
105 // FastSaxParser
106 // --------------------------------------------------------------------
107
108 //---------------------------------------------
109 // the implementation part
110 //---------------------------------------------
111
112 extern "C" {
113
call_callbackStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)114 static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts)
115 {
116 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
117 pFastParser->callbackStartElement( name, atts );
118 }
119
call_callbackEndElement(void * userData,const XML_Char * name)120 static void call_callbackEndElement(void *userData, const XML_Char *name)
121 {
122 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
123 pFastParser->callbackEndElement( name );
124 }
125
call_callbackCharacters(void * userData,const XML_Char * s,int nLen)126 static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen )
127 {
128 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
129 pFastParser->callbackCharacters( s, nLen );
130 }
131
call_callbackExternalEntityRef(XML_Parser parser,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)132 static int call_callbackExternalEntityRef( XML_Parser parser,
133 const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId )
134 {
135 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( XML_GetUserData( parser ) );
136 return pFastParser->callbackExternalEntityRef( parser, openEntityNames, base, systemId, publicId );
137 }
138
139 } // extern "C"
140
141 // --------------------------------------------------------------------
142 // FastLocatorImpl implementation
143 // --------------------------------------------------------------------
144
getColumnNumber(void)145 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber(void) throw (RuntimeException)
146 {
147 checkDispose();
148 return XML_GetCurrentColumnNumber( mpParser->getEntity().mpParser );
149 }
150
151 // --------------------------------------------------------------------
152
getLineNumber(void)153 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber(void) throw (RuntimeException)
154 {
155 checkDispose();
156 return XML_GetCurrentLineNumber( mpParser->getEntity().mpParser );
157 }
158
159 // --------------------------------------------------------------------
160
getPublicId(void)161 OUString SAL_CALL FastLocatorImpl::getPublicId(void) throw (RuntimeException)
162 {
163 checkDispose();
164 return mpParser->getEntity().maStructSource.sPublicId;
165 }
166 // --------------------------------------------------------------------
167
getSystemId(void)168 OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
169 {
170 checkDispose();
171 return mpParser->getEntity().maStructSource.sSystemId;
172 }
173
174 // --------------------------------------------------------------------
175
ParserData()176 ParserData::ParserData()
177 {
178 }
179
~ParserData()180 ParserData::~ParserData()
181 {
182 }
183
184 // --------------------------------------------------------------------
185
Entity(const ParserData & rData)186 Entity::Entity( const ParserData& rData ) :
187 ParserData( rData )
188 {
189 // performance-Improvment. Reference is needed when calling the startTag callback.
190 // Handing out the same object with every call is allowed (see sax-specification)
191 mxAttributes.set( new FastAttributeList( mxTokenHandler ) );
192 }
193
~Entity()194 Entity::~Entity()
195 {
196 }
197
198 // --------------------------------------------------------------------
199 // FastSaxParser implementation
200 // --------------------------------------------------------------------
201
FastSaxParser()202 FastSaxParser::FastSaxParser()
203 {
204 mxDocumentLocator.set( new FastLocatorImpl( this ) );
205 }
206
207 // --------------------------------------------------------------------
208
~FastSaxParser()209 FastSaxParser::~FastSaxParser()
210 {
211 if( mxDocumentLocator.is() )
212 mxDocumentLocator->dispose();
213 }
214
215 // --------------------------------------------------------------------
216
pushContext()217 void FastSaxParser::pushContext()
218 {
219 Entity& rEntity = getEntity();
220 if( rEntity.maContextStack.empty() )
221 {
222 rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) );
223 DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace");
224 }
225 else
226 {
227 rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) );
228 }
229 }
230
231 // --------------------------------------------------------------------
232
popContext()233 void FastSaxParser::popContext()
234 {
235 Entity& rEntity = getEntity();
236 OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::popContext(), pop without push?" );
237 if( !rEntity.maContextStack.empty() )
238 rEntity.maContextStack.pop();
239 }
240
241 // --------------------------------------------------------------------
242
DefineNamespace(const OString & rPrefix,const sal_Char * pNamespaceURL)243 void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL )
244 {
245 Entity& rEntity = getEntity();
246 OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::DefineNamespace(), I need a context!" );
247 if( !rEntity.maContextStack.empty() )
248 {
249 sal_uInt32 nOffset = rEntity.maContextStack.top()->mnNamespaceCount++;
250
251 if( rEntity.maNamespaceDefines.size() <= nOffset )
252 rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
253
254 const OUString aNamespaceURL( pNamespaceURL, strlen( pNamespaceURL ), RTL_TEXTENCODING_UTF8 );
255 rEntity.maNamespaceDefines[nOffset].reset( new NamespaceDefine( rPrefix, GetNamespaceToken( aNamespaceURL ), aNamespaceURL ) );
256 }
257 }
258
259 // --------------------------------------------------------------------
260
GetToken(const OString & rToken)261 sal_Int32 FastSaxParser::GetToken( const OString& rToken )
262 {
263 Sequence< sal_Int8 > aSeq( (sal_Int8*)rToken.getStr(), rToken.getLength() );
264
265 return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
266 }
267
GetToken(const sal_Char * pToken,sal_Int32 nLen)268 sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ )
269 {
270 if( !nLen )
271 nLen = strlen( pToken );
272
273 Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen );
274
275 return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
276 }
277
278 // --------------------------------------------------------------------
279
GetTokenWithPrefix(const OString & rPrefix,const OString & rName)280 sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (SAXException)
281 {
282 sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
283
284 Entity& rEntity = getEntity();
285 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
286 while( nNamespace-- )
287 {
288 if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
289 {
290 nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
291 break;
292 }
293
294 if( !nNamespace )
295 throw SAXException(); // prefix that has no defined namespace url
296 }
297
298 if( nNamespaceToken != FastToken::DONTKNOW )
299 {
300 sal_Int32 nNameToken = GetToken( rName.getStr(), rName.getLength() );
301 if( nNameToken != FastToken::DONTKNOW )
302 return nNamespaceToken | nNameToken;
303 }
304
305 return FastToken::DONTKNOW;
306 }
307
GetTokenWithPrefix(const sal_Char * pPrefix,int nPrefixLen,const sal_Char * pName,int nNameLen)308 sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException)
309 {
310 sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
311
312 Entity& rEntity = getEntity();
313 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
314 while( nNamespace-- )
315 {
316 const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
317 if( (rPrefix.getLength() == nPrefixLen) &&
318 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) )
319 {
320 nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
321 break;
322 }
323
324 if( !nNamespace )
325 throw SAXException(); // prefix that has no defined namespace url
326 }
327
328 if( nNamespaceToken != FastToken::DONTKNOW )
329 {
330 sal_Int32 nNameToken = GetToken( pName, nNameLen );
331 if( nNameToken != FastToken::DONTKNOW )
332 return nNamespaceToken | nNameToken;
333 }
334
335 return FastToken::DONTKNOW;
336 }
337
338 // --------------------------------------------------------------------
339
GetNamespaceToken(const OUString & rNamespaceURL)340 sal_Int32 FastSaxParser::GetNamespaceToken( const OUString& rNamespaceURL )
341 {
342 NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
343 if( aIter != maNamespaceMap.end() )
344 return (*aIter).second;
345 else
346 return FastToken::DONTKNOW;
347 }
348
349 // --------------------------------------------------------------------
350
GetNamespaceURL(const OString & rPrefix)351 OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXException)
352 {
353 Entity& rEntity = getEntity();
354 if( !rEntity.maContextStack.empty() )
355 {
356 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
357 while( nNamespace-- )
358 if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
359 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
360 }
361
362 throw SAXException(); // prefix that has no defined namespace url
363 }
364
GetNamespaceURL(const sal_Char * pPrefix,int nPrefixLen)365 OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException)
366 {
367 Entity& rEntity = getEntity();
368 if( pPrefix && !rEntity.maContextStack.empty() )
369 {
370 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
371 while( nNamespace-- )
372 {
373 const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
374 if( (rPrefix.getLength() == nPrefixLen) &&
375 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) )
376 {
377 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
378 }
379 }
380 }
381
382 throw SAXException(); // prefix that has no defined namespace url
383 }
384
385 // --------------------------------------------------------------------
386
GetTokenWithNamespaceURL(const OUString & rNamespaceURL,const sal_Char * pName,int nNameLen)387 sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen )
388 {
389 sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL );
390
391 if( nNamespaceToken != FastToken::DONTKNOW )
392 {
393 sal_Int32 nNameToken = GetToken( pName, nNameLen );
394 if( nNameToken != FastToken::DONTKNOW )
395 return nNamespaceToken | nNameToken;
396 }
397
398 return FastToken::DONTKNOW;
399 }
400
401 // --------------------------------------------------------------------
402
splitName(const XML_Char * pwName,const XML_Char * & rpPrefix,sal_Int32 & rPrefixLen,const XML_Char * & rpName,sal_Int32 & rNameLen)403 void FastSaxParser::splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen )
404 {
405 XML_Char *p;
406 for( p = const_cast< XML_Char* >( pwName ), rNameLen = 0, rPrefixLen = 0; *p; p++ )
407 {
408 if( *p == ':' )
409 {
410 rPrefixLen = p - pwName;
411 rNameLen = 0;
412 }
413 else
414 {
415 rNameLen++;
416 }
417 }
418 if( rPrefixLen )
419 {
420 rpPrefix = pwName;
421 rpName = &pwName[ rPrefixLen + 1 ];
422 }
423 else
424 {
425 rpPrefix = 0;
426 rpName = pwName;
427 }
428 }
429
430 /***************
431 *
432 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
433 * the file-specific initialization work. (During a parser run, external files may be opened)
434 *
435 ****************/
parseStream(const InputSource & maStructSource)436 void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXException, IOException, RuntimeException)
437 {
438 // Only one text at one time
439 MutexGuard guard( maMutex );
440
441 Entity entity( maData );
442 entity.maStructSource = maStructSource;
443
444 if( !entity.maStructSource.aInputStream.is() )
445 throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "No input source" ) ), Reference< XInterface >(), Any() );
446
447 entity.maConverter.setInputStream( entity.maStructSource.aInputStream );
448 if( entity.maStructSource.sEncoding.getLength() )
449 entity.maConverter.setEncoding( OUStringToOString( entity.maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US ) );
450
451 // create parser with proper encoding
452 entity.mpParser = XML_ParserCreate( 0 );
453 if( !entity.mpParser )
454 throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "Couldn't create parser" ) ), Reference< XInterface >(), Any() );
455
456 // set all necessary C-Callbacks
457 XML_SetUserData( entity.mpParser, this );
458 XML_SetElementHandler( entity.mpParser, call_callbackStartElement, call_callbackEndElement );
459 XML_SetCharacterDataHandler( entity.mpParser, call_callbackCharacters );
460 XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef );
461
462 pushEntity( entity );
463 try
464 {
465 // start the document
466 if( entity.mxDocumentHandler.is() )
467 {
468 Reference< XLocator > xLoc( mxDocumentLocator.get() );
469 entity.mxDocumentHandler->setDocumentLocator( xLoc );
470 entity.mxDocumentHandler->startDocument();
471 }
472
473 parse();
474
475 // finish document
476 if( entity.mxDocumentHandler.is() )
477 {
478 entity.mxDocumentHandler->endDocument();
479 }
480 }
481 catch( SAXException & )
482 {
483 popEntity();
484 XML_ParserFree( entity.mpParser );
485 throw;
486 }
487 catch( IOException & )
488 {
489 popEntity();
490 XML_ParserFree( entity.mpParser );
491 throw;
492 }
493 catch( RuntimeException & )
494 {
495 popEntity();
496 XML_ParserFree( entity.mpParser );
497 throw;
498 }
499
500 popEntity();
501 XML_ParserFree( entity.mpParser );
502 }
503
setFastDocumentHandler(const Reference<XFastDocumentHandler> & Handler)504 void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) throw (RuntimeException)
505 {
506 maData.mxDocumentHandler = Handler;
507 }
508
setTokenHandler(const Reference<XFastTokenHandler> & Handler)509 void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException)
510 {
511 maData.mxTokenHandler = Handler;
512 }
513
registerNamespace(const OUString & NamespaceURL,sal_Int32 NamespaceToken)514 void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException)
515 {
516 if( NamespaceToken >= FastToken::NAMESPACE )
517 {
518 if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
519 {
520 maNamespaceMap[ NamespaceURL ] = NamespaceToken;
521 return;
522 }
523 }
524 throw IllegalArgumentException();
525 }
526
setErrorHandler(const Reference<XErrorHandler> & Handler)527 void FastSaxParser::setErrorHandler(const Reference< XErrorHandler > & Handler) throw (RuntimeException)
528 {
529 maData.mxErrorHandler = Handler;
530 }
531
setEntityResolver(const Reference<XEntityResolver> & Resolver)532 void FastSaxParser::setEntityResolver(const Reference < XEntityResolver > & Resolver) throw (RuntimeException)
533 {
534 maData.mxEntityResolver = Resolver;
535 }
536
setLocale(const Locale & Locale)537 void FastSaxParser::setLocale( const Locale & Locale ) throw (RuntimeException)
538 {
539 maData.maLocale = Locale;
540 }
541
getSupportedServiceNames_Static(void)542 Sequence< OUString > FastSaxParser::getSupportedServiceNames_Static(void)
543 {
544 Sequence<OUString> aRet(1);
545 aRet.getArray()[0] = ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(PARSER_SERVICE_NAME) );
546 return aRet;
547 }
548
549 // XServiceInfo
getImplementationName()550 OUString FastSaxParser::getImplementationName() throw (RuntimeException)
551 {
552 return OUString::createFromAscii( PARSER_IMPLEMENTATION_NAME );
553 }
554
555 // XServiceInfo
supportsService(const OUString & ServiceName)556 sal_Bool FastSaxParser::supportsService(const OUString& ServiceName) throw (RuntimeException)
557 {
558 Sequence< OUString > aSNL = getSupportedServiceNames();
559 const OUString * pArray = aSNL.getConstArray();
560
561 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
562 if( pArray[i] == ServiceName )
563 return sal_True;
564
565 return sal_False;
566 }
567
568 // XServiceInfo
getSupportedServiceNames(void)569 Sequence< OUString > FastSaxParser::getSupportedServiceNames(void) throw (RuntimeException)
570 {
571
572 Sequence<OUString> seq(1);
573 seq.getArray()[0] = OUString::createFromAscii( PARSER_SERVICE_NAME );
574 return seq;
575 }
576
577
578 /*---------------------------------------
579 *
580 * Helper functions and classes
581 *
582 *-------------------------------------------*/
583
584 namespace {
585
lclGetErrorMessage(XML_Error xmlE,const OUString & sSystemId,sal_Int32 nLine)586 OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int32 nLine )
587 {
588 const sal_Char* pMessage = "";
589 switch( xmlE )
590 {
591 case XML_ERROR_NONE: pMessage = "No"; break;
592 case XML_ERROR_NO_MEMORY: pMessage = "no memory"; break;
593 case XML_ERROR_SYNTAX: pMessage = "syntax"; break;
594 case XML_ERROR_NO_ELEMENTS: pMessage = "no elements"; break;
595 case XML_ERROR_INVALID_TOKEN: pMessage = "invalid token"; break;
596 case XML_ERROR_UNCLOSED_TOKEN: pMessage = "unclosed token"; break;
597 case XML_ERROR_PARTIAL_CHAR: pMessage = "partial char"; break;
598 case XML_ERROR_TAG_MISMATCH: pMessage = "tag mismatch"; break;
599 case XML_ERROR_DUPLICATE_ATTRIBUTE: pMessage = "duplicate attribute"; break;
600 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: pMessage = "junk after doc element"; break;
601 case XML_ERROR_PARAM_ENTITY_REF: pMessage = "parameter entity reference"; break;
602 case XML_ERROR_UNDEFINED_ENTITY: pMessage = "undefined entity"; break;
603 case XML_ERROR_RECURSIVE_ENTITY_REF: pMessage = "recursive entity reference"; break;
604 case XML_ERROR_ASYNC_ENTITY: pMessage = "async entity"; break;
605 case XML_ERROR_BAD_CHAR_REF: pMessage = "bad char reference"; break;
606 case XML_ERROR_BINARY_ENTITY_REF: pMessage = "binary entity reference"; break;
607 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: pMessage = "attribute external entity reference"; break;
608 case XML_ERROR_MISPLACED_XML_PI: pMessage = "misplaced xml processing instruction"; break;
609 case XML_ERROR_UNKNOWN_ENCODING: pMessage = "unknown encoding"; break;
610 case XML_ERROR_INCORRECT_ENCODING: pMessage = "incorrect encoding"; break;
611 case XML_ERROR_UNCLOSED_CDATA_SECTION: pMessage = "unclosed cdata section"; break;
612 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: pMessage = "external entity reference"; break;
613 case XML_ERROR_NOT_STANDALONE: pMessage = "not standalone"; break;
614 default:;
615 }
616
617 OUStringBuffer aBuffer( sal_Unicode( '[' ) );
618 aBuffer.append( sSystemId );
619 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " line " ) );
620 aBuffer.append( nLine );
621 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( "]: " ) );
622 aBuffer.appendAscii( pMessage );
623 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " error" ) );
624 return aBuffer.makeStringAndClear();
625 }
626
627 } // namespace
628
629 // starts parsing with actual parser !
parse()630 void FastSaxParser::parse()
631 {
632 const int BUFFER_SIZE = 16 * 1024;
633 Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
634
635 Entity& rEntity = getEntity();
636 int nRead = 0;
637 do
638 {
639 nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
640 if( nRead <= 0 )
641 {
642 XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), 0, 1 );
643 break;
644 }
645
646 bool bContinue = XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), nRead, 0 ) != 0;
647 // callbacks used inside XML_Parse may have caught an exception
648 if( !bContinue || rEntity.maSavedException.hasValue() )
649 {
650 // Error during parsing !
651 XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser );
652 OUString sSystemId = mxDocumentLocator->getSystemId();
653 sal_Int32 nLine = mxDocumentLocator->getLineNumber();
654
655 SAXParseException aExcept(
656 lclGetErrorMessage( xmlE, sSystemId, nLine ),
657 Reference< XInterface >(),
658 Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ),
659 mxDocumentLocator->getPublicId(),
660 mxDocumentLocator->getSystemId(),
661 mxDocumentLocator->getLineNumber(),
662 mxDocumentLocator->getColumnNumber()
663 );
664
665 // error handler is set, it may throw the exception
666 if( rEntity.mxErrorHandler.is() )
667 rEntity.mxErrorHandler->fatalError( Any( aExcept ) );
668
669 // error handler has not thrown, but parsing cannot go on, the
670 // exception MUST be thrown
671 throw aExcept;
672 }
673 }
674 while( nRead > 0 );
675 }
676
677 //------------------------------------------
678 //
679 // The C-Callbacks
680 //
681 //-----------------------------------------
682
683 namespace {
684
685 struct AttributeData
686 {
687 OString maPrefix;
688 OString maName;
689 OString maValue;
690 };
691
692 } // namespace
693
callbackStartElement(const XML_Char * pwName,const XML_Char ** awAttributes)694 void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes )
695 {
696 Reference< XFastContextHandler > xParentContext;
697 Entity& rEntity = getEntity();
698 if( !rEntity.maContextStack.empty() )
699 {
700 xParentContext = rEntity.maContextStack.top()->mxContext;
701 if( !xParentContext.is() )
702 {
703 // we ignore current elements, so no processing needed
704 pushContext();
705 return;
706 }
707 }
708
709 pushContext();
710
711 rEntity.mxAttributes->clear();
712
713 // create attribute map and process namespace instructions
714 int i = 0;
715 sal_Int32 nNameLen, nPrefixLen;
716 const XML_Char *pName;
717 const XML_Char *pPrefix;
718
719 try
720 {
721 /* #158414# Each element may define new namespaces, also for attribues.
722 First, process all namespace attributes and cache other attributes in a
723 vector. Second, process the attributes after namespaces have been
724 initialized. */
725 ::std::vector< AttributeData > aAttribs;
726
727 // #158414# first: get namespaces
728 for( ; awAttributes[i]; i += 2 )
729 {
730 OSL_ASSERT( awAttributes[i+1] );
731
732 splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen );
733 if( nPrefixLen )
734 {
735 if( (nPrefixLen == 5) && (strncmp( pPrefix, "xmlns", 5 ) == 0) )
736 {
737 DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] );
738 }
739 else
740 {
741 aAttribs.resize( aAttribs.size() + 1 );
742 aAttribs.back().maPrefix = OString( pPrefix, nPrefixLen );
743 aAttribs.back().maName = OString( pName, nNameLen );
744 aAttribs.back().maValue = OString( awAttributes[i+1] );
745 }
746 }
747 else
748 {
749 if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) )
750 {
751 // namespace of the element found
752 rEntity.maContextStack.top()->maNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
753 }
754 else
755 {
756 aAttribs.resize( aAttribs.size() + 1 );
757 aAttribs.back().maName = OString( pName, nNameLen );
758 aAttribs.back().maValue = OString( awAttributes[i+1] );
759 }
760 }
761 }
762
763 // #158414# second: fill attribute list with other attributes
764 for( ::std::vector< AttributeData >::const_iterator aIt = aAttribs.begin(), aEnd = aAttribs.end(); aIt != aEnd; ++aIt )
765 {
766 if( aIt->maPrefix.getLength() > 0 )
767 {
768 sal_Int32 nAttributeToken = GetTokenWithPrefix( aIt->maPrefix, aIt->maName );
769 if( nAttributeToken != FastToken::DONTKNOW )
770 rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
771 else
772 rEntity.mxAttributes->addUnknown( GetNamespaceURL( aIt->maPrefix ), aIt->maName, aIt->maValue );
773 }
774 else
775 {
776 sal_Int32 nAttributeToken = GetToken( aIt->maName );
777 if( nAttributeToken != FastToken::DONTKNOW )
778 rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
779 else
780 rEntity.mxAttributes->addUnknown( aIt->maName, aIt->maValue );
781 }
782 }
783
784 sal_Int32 nElementToken;
785 splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen );
786 if( nPrefixLen > 0 )
787 nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
788 else if( rEntity.maContextStack.top()->maNamespace.getLength() > 0 )
789 nElementToken = GetTokenWithNamespaceURL( rEntity.maContextStack.top()->maNamespace, pName, nNameLen );
790 else
791 nElementToken = GetToken( pName );
792 rEntity.maContextStack.top()->mnElementToken = nElementToken;
793
794 Reference< XFastAttributeList > xAttr( rEntity.mxAttributes.get() );
795 Reference< XFastContextHandler > xContext;
796 if( nElementToken == FastToken::DONTKNOW )
797 {
798 if( nPrefixLen > 0 )
799 rEntity.maContextStack.top()->maNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
800
801 const OUString aNamespace( rEntity.maContextStack.top()->maNamespace );
802 const OUString aElementName( pPrefix, nPrefixLen, RTL_TEXTENCODING_UTF8 );
803 rEntity.maContextStack.top()->maElementName = aElementName;
804
805 if( xParentContext.is() )
806 xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
807 else
808 xContext = rEntity.mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
809
810 if( xContext.is() )
811 {
812 rEntity.maContextStack.top()->mxContext = xContext;
813 xContext->startUnknownElement( aNamespace, aElementName, xAttr );
814 }
815 }
816 else
817 {
818 if( xParentContext.is() )
819 xContext = xParentContext->createFastChildContext( nElementToken, xAttr );
820 else
821 xContext = rEntity.mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
822
823
824 if( xContext.is() )
825 {
826 rEntity.maContextStack.top()->mxContext = xContext;
827 xContext->startFastElement( nElementToken, xAttr );
828 }
829 }
830 }
831 catch( Exception& e )
832 {
833 rEntity.maSavedException <<= e;
834 }
835 }
836
callbackEndElement(const XML_Char *)837 void FastSaxParser::callbackEndElement( const XML_Char* )
838 {
839 Entity& rEntity = getEntity();
840 OSL_ENSURE( !rEntity.maContextStack.empty(), "FastSaxParser::callbackEndElement - no context" );
841 if( !rEntity.maContextStack.empty() )
842 {
843 SaxContextImplPtr pContext = rEntity.maContextStack.top();
844 const Reference< XFastContextHandler >& xContext( pContext->mxContext );
845 if( xContext.is() ) try
846 {
847 sal_Int32 nElementToken = pContext->mnElementToken;
848 if( nElementToken != FastToken::DONTKNOW )
849 xContext->endFastElement( nElementToken );
850 else
851 xContext->endUnknownElement( pContext->maNamespace, pContext->maElementName );
852 }
853 catch( Exception& e )
854 {
855 rEntity.maSavedException <<= e;
856 }
857
858 popContext();
859 }
860 }
861
862
callbackCharacters(const XML_Char * s,int nLen)863 void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
864 {
865 Entity& rEntity = getEntity();
866 const Reference< XFastContextHandler >& xContext( rEntity.maContextStack.top()->mxContext );
867 if( xContext.is() ) try
868 {
869 xContext->characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) );
870 }
871 catch( Exception& e )
872 {
873 rEntity.maSavedException <<= e;
874 }
875 }
876
callbackExternalEntityRef(XML_Parser parser,const XML_Char * context,const XML_Char *,const XML_Char * systemId,const XML_Char * publicId)877 int FastSaxParser::callbackExternalEntityRef( XML_Parser parser,
878 const XML_Char *context, const XML_Char * /*base*/, const XML_Char *systemId, const XML_Char *publicId )
879 {
880 bool bOK = true;
881 InputSource source;
882
883 Entity& rCurrEntity = getEntity();
884 Entity aNewEntity( rCurrEntity );
885
886 if( rCurrEntity.mxEntityResolver.is() ) try
887 {
888 aNewEntity.maStructSource = rCurrEntity.mxEntityResolver->resolveEntity(
889 OUString( publicId, strlen( publicId ), RTL_TEXTENCODING_UTF8 ) ,
890 OUString( systemId, strlen( systemId ), RTL_TEXTENCODING_UTF8 ) );
891 }
892 catch( SAXParseException & e )
893 {
894 rCurrEntity.maSavedException <<= e;
895 bOK = false;
896 }
897 catch( SAXException & e )
898 {
899 rCurrEntity.maSavedException <<= SAXParseException(
900 e.Message, e.Context, e.WrappedException,
901 mxDocumentLocator->getPublicId(),
902 mxDocumentLocator->getSystemId(),
903 mxDocumentLocator->getLineNumber(),
904 mxDocumentLocator->getColumnNumber() );
905 bOK = false;
906 }
907
908 if( aNewEntity.maStructSource.aInputStream.is() )
909 {
910 aNewEntity.mpParser = XML_ExternalEntityParserCreate( parser, context, 0 );
911 if( !aNewEntity.mpParser )
912 {
913 return false;
914 }
915
916 aNewEntity.maConverter.setInputStream( aNewEntity.maStructSource.aInputStream );
917 pushEntity( aNewEntity );
918 try
919 {
920 parse();
921 }
922 catch( SAXParseException & e )
923 {
924 rCurrEntity.maSavedException <<= e;
925 bOK = false;
926 }
927 catch( IOException &e )
928 {
929 SAXException aEx;
930 aEx.WrappedException <<= e;
931 rCurrEntity.maSavedException <<= aEx;
932 bOK = false;
933 }
934 catch( RuntimeException &e )
935 {
936 SAXException aEx;
937 aEx.WrappedException <<= e;
938 rCurrEntity.maSavedException <<= aEx;
939 bOK = false;
940 }
941
942 popEntity();
943 XML_ParserFree( aNewEntity.mpParser );
944 }
945
946 return bOK;
947 }
948
949 } // namespace sax_fastparser
950