1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 //#include <stdlib.h> 25 //#include <sal/alloca.h> 26 27 #include <boost/scoped_ptr.hpp> 28 29 #include <osl/diagnose.h> 30 #include <rtl/ustrbuf.hxx> 31 32 #include <com/sun/star/lang/DisposedException.hpp> 33 #include <com/sun/star/xml/sax/XFastContextHandler.hpp> 34 #include <com/sun/star/xml/sax/SAXParseException.hpp> 35 #include <com/sun/star/xml/sax/FastToken.hpp> 36 37 #include "fastparser.hxx" 38 39 #include <string.h> 40 41 using ::rtl::OString; 42 using ::rtl::OUString; 43 using ::rtl::OUStringBuffer; 44 using namespace ::std; 45 using namespace ::osl; 46 using namespace ::cppu; 47 using namespace ::com::sun::star::uno; 48 using namespace ::com::sun::star::lang; 49 using namespace ::com::sun::star::xml::sax; 50 //using namespace ::com::sun::star::util; 51 using namespace ::com::sun::star::io; 52 53 namespace sax_fastparser { 54 55 // -------------------------------------------------------------------- 56 57 struct SaxContextImpl 58 { 59 Reference< XFastContextHandler > mxContext; 60 sal_uInt32 mnNamespaceCount; 61 sal_Int32 mnElementToken; 62 OUString maNamespace; 63 OUString maElementName; 64 65 SaxContextImpl() { mnNamespaceCount = 0; mnElementToken = 0; } 66 SaxContextImpl( const SaxContextImplPtr& p ) { mnNamespaceCount = p->mnNamespaceCount; mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; } 67 }; 68 69 // -------------------------------------------------------------------- 70 71 struct NamespaceDefine 72 { 73 OString maPrefix; 74 sal_Int32 mnToken; 75 OUString maNamespaceURL; 76 77 NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {} 78 }; 79 80 // -------------------------------------------------------------------- 81 // FastLocatorImpl 82 // -------------------------------------------------------------------- 83 84 class FastSaxParser; 85 86 class FastLocatorImpl : public WeakImplHelper1< XLocator > 87 { 88 public: 89 FastLocatorImpl( FastSaxParser *p ) : mpParser(p) {} 90 91 void dispose() { mpParser = 0; } 92 void checkDispose() throw (RuntimeException) { if( !mpParser ) throw DisposedException(); } 93 94 //XLocator 95 virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (RuntimeException); 96 virtual sal_Int32 SAL_CALL getLineNumber(void) throw (RuntimeException); 97 virtual OUString SAL_CALL getPublicId(void) throw (RuntimeException); 98 virtual OUString SAL_CALL getSystemId(void) throw (RuntimeException); 99 100 private: 101 FastSaxParser *mpParser; 102 }; 103 104 // -------------------------------------------------------------------- 105 // FastSaxParser 106 // -------------------------------------------------------------------- 107 108 //--------------------------------------------- 109 // the implementation part 110 //--------------------------------------------- 111 112 extern "C" { 113 114 static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts) 115 { 116 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 117 pFastParser->callbackStartElement( name, atts ); 118 } 119 120 static void call_callbackEndElement(void *userData, const XML_Char *name) 121 { 122 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 123 pFastParser->callbackEndElement( name ); 124 } 125 126 static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen ) 127 { 128 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 129 pFastParser->callbackCharacters( s, nLen ); 130 } 131 132 static int call_callbackExternalEntityRef( XML_Parser parser, 133 const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId ) 134 { 135 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( XML_GetUserData( parser ) ); 136 return pFastParser->callbackExternalEntityRef( parser, openEntityNames, base, systemId, publicId ); 137 } 138 139 } // extern "C" 140 141 // -------------------------------------------------------------------- 142 // FastLocatorImpl implementation 143 // -------------------------------------------------------------------- 144 145 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber(void) throw (RuntimeException) 146 { 147 checkDispose(); 148 return XML_GetCurrentColumnNumber( mpParser->getEntity().mpParser ); 149 } 150 151 // -------------------------------------------------------------------- 152 153 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber(void) throw (RuntimeException) 154 { 155 checkDispose(); 156 return XML_GetCurrentLineNumber( mpParser->getEntity().mpParser ); 157 } 158 159 // -------------------------------------------------------------------- 160 161 OUString SAL_CALL FastLocatorImpl::getPublicId(void) throw (RuntimeException) 162 { 163 checkDispose(); 164 return mpParser->getEntity().maStructSource.sPublicId; 165 } 166 // -------------------------------------------------------------------- 167 168 OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException) 169 { 170 checkDispose(); 171 return mpParser->getEntity().maStructSource.sSystemId; 172 } 173 174 // -------------------------------------------------------------------- 175 176 ParserData::ParserData() 177 { 178 } 179 180 ParserData::~ParserData() 181 { 182 } 183 184 // -------------------------------------------------------------------- 185 186 Entity::Entity( const ParserData& rData ) : 187 ParserData( rData ) 188 { 189 // performance-Improvment. Reference is needed when calling the startTag callback. 190 // Handing out the same object with every call is allowed (see sax-specification) 191 mxAttributes.set( new FastAttributeList( mxTokenHandler ) ); 192 } 193 194 Entity::~Entity() 195 { 196 } 197 198 // -------------------------------------------------------------------- 199 // FastSaxParser implementation 200 // -------------------------------------------------------------------- 201 202 FastSaxParser::FastSaxParser() 203 { 204 mxDocumentLocator.set( new FastLocatorImpl( this ) ); 205 } 206 207 // -------------------------------------------------------------------- 208 209 FastSaxParser::~FastSaxParser() 210 { 211 if( mxDocumentLocator.is() ) 212 mxDocumentLocator->dispose(); 213 } 214 215 // -------------------------------------------------------------------- 216 217 void FastSaxParser::pushContext() 218 { 219 Entity& rEntity = getEntity(); 220 if( rEntity.maContextStack.empty() ) 221 { 222 rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) ); 223 DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace"); 224 } 225 else 226 { 227 rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) ); 228 } 229 } 230 231 // -------------------------------------------------------------------- 232 233 void FastSaxParser::popContext() 234 { 235 Entity& rEntity = getEntity(); 236 OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::popContext(), pop without push?" ); 237 if( !rEntity.maContextStack.empty() ) 238 rEntity.maContextStack.pop(); 239 } 240 241 // -------------------------------------------------------------------- 242 243 void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL ) 244 { 245 Entity& rEntity = getEntity(); 246 OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::DefineNamespace(), I need a context!" ); 247 if( !rEntity.maContextStack.empty() ) 248 { 249 sal_uInt32 nOffset = rEntity.maContextStack.top()->mnNamespaceCount++; 250 251 if( rEntity.maNamespaceDefines.size() <= nOffset ) 252 rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 ); 253 254 const OUString aNamespaceURL( pNamespaceURL, strlen( pNamespaceURL ), RTL_TEXTENCODING_UTF8 ); 255 rEntity.maNamespaceDefines[nOffset].reset( new NamespaceDefine( rPrefix, GetNamespaceToken( aNamespaceURL ), aNamespaceURL ) ); 256 } 257 } 258 259 // -------------------------------------------------------------------- 260 261 sal_Int32 FastSaxParser::GetToken( const OString& rToken ) 262 { 263 Sequence< sal_Int8 > aSeq( (sal_Int8*)rToken.getStr(), rToken.getLength() ); 264 265 return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq ); 266 } 267 268 sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ ) 269 { 270 if( !nLen ) 271 nLen = strlen( pToken ); 272 273 Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); 274 275 return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq ); 276 } 277 278 // -------------------------------------------------------------------- 279 280 sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (SAXException) 281 { 282 sal_Int32 nNamespaceToken = FastToken::DONTKNOW; 283 284 Entity& rEntity = getEntity(); 285 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 286 while( nNamespace-- ) 287 { 288 if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix ) 289 { 290 nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken; 291 break; 292 } 293 294 if( !nNamespace ) 295 throw SAXException(); // prefix that has no defined namespace url 296 } 297 298 if( nNamespaceToken != FastToken::DONTKNOW ) 299 { 300 sal_Int32 nNameToken = GetToken( rName.getStr(), rName.getLength() ); 301 if( nNameToken != FastToken::DONTKNOW ) 302 return nNamespaceToken | nNameToken; 303 } 304 305 return FastToken::DONTKNOW; 306 } 307 308 sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException) 309 { 310 sal_Int32 nNamespaceToken = FastToken::DONTKNOW; 311 312 Entity& rEntity = getEntity(); 313 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 314 while( nNamespace-- ) 315 { 316 const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); 317 if( (rPrefix.getLength() == nPrefixLen) && 318 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) ) 319 { 320 nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken; 321 break; 322 } 323 324 if( !nNamespace ) 325 throw SAXException(); // prefix that has no defined namespace url 326 } 327 328 if( nNamespaceToken != FastToken::DONTKNOW ) 329 { 330 sal_Int32 nNameToken = GetToken( pName, nNameLen ); 331 if( nNameToken != FastToken::DONTKNOW ) 332 return nNamespaceToken | nNameToken; 333 } 334 335 return FastToken::DONTKNOW; 336 } 337 338 // -------------------------------------------------------------------- 339 340 sal_Int32 FastSaxParser::GetNamespaceToken( const OUString& rNamespaceURL ) 341 { 342 NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) ); 343 if( aIter != maNamespaceMap.end() ) 344 return (*aIter).second; 345 else 346 return FastToken::DONTKNOW; 347 } 348 349 // -------------------------------------------------------------------- 350 351 OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXException) 352 { 353 Entity& rEntity = getEntity(); 354 if( !rEntity.maContextStack.empty() ) 355 { 356 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 357 while( nNamespace-- ) 358 if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix ) 359 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL; 360 } 361 362 throw SAXException(); // prefix that has no defined namespace url 363 } 364 365 OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException) 366 { 367 Entity& rEntity = getEntity(); 368 if( pPrefix && !rEntity.maContextStack.empty() ) 369 { 370 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 371 while( nNamespace-- ) 372 { 373 const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); 374 if( (rPrefix.getLength() == nPrefixLen) && 375 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) ) 376 { 377 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL; 378 } 379 } 380 } 381 382 throw SAXException(); // prefix that has no defined namespace url 383 } 384 385 // -------------------------------------------------------------------- 386 387 sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen ) 388 { 389 sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL ); 390 391 if( nNamespaceToken != FastToken::DONTKNOW ) 392 { 393 sal_Int32 nNameToken = GetToken( pName, nNameLen ); 394 if( nNameToken != FastToken::DONTKNOW ) 395 return nNamespaceToken | nNameToken; 396 } 397 398 return FastToken::DONTKNOW; 399 } 400 401 // -------------------------------------------------------------------- 402 403 void FastSaxParser::splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen ) 404 { 405 XML_Char *p; 406 for( p = const_cast< XML_Char* >( pwName ), rNameLen = 0, rPrefixLen = 0; *p; p++ ) 407 { 408 if( *p == ':' ) 409 { 410 rPrefixLen = p - pwName; 411 rNameLen = 0; 412 } 413 else 414 { 415 rNameLen++; 416 } 417 } 418 if( rPrefixLen ) 419 { 420 rpPrefix = pwName; 421 rpName = &pwName[ rPrefixLen + 1 ]; 422 } 423 else 424 { 425 rpPrefix = 0; 426 rpName = pwName; 427 } 428 } 429 430 /*************** 431 * 432 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does 433 * the file-specific initialization work. (During a parser run, external files may be opened) 434 * 435 ****************/ 436 void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXException, IOException, RuntimeException) 437 { 438 // Only one text at one time 439 MutexGuard guard( maMutex ); 440 441 Entity entity( maData ); 442 entity.maStructSource = maStructSource; 443 444 if( !entity.maStructSource.aInputStream.is() ) 445 throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "No input source" ) ), Reference< XInterface >(), Any() ); 446 447 entity.maConverter.setInputStream( entity.maStructSource.aInputStream ); 448 if( entity.maStructSource.sEncoding.getLength() ) 449 entity.maConverter.setEncoding( OUStringToOString( entity.maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US ) ); 450 451 // create parser with proper encoding 452 entity.mpParser = XML_ParserCreate( 0 ); 453 if( !entity.mpParser ) 454 throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "Couldn't create parser" ) ), Reference< XInterface >(), Any() ); 455 456 // set all necessary C-Callbacks 457 XML_SetUserData( entity.mpParser, this ); 458 XML_SetElementHandler( entity.mpParser, call_callbackStartElement, call_callbackEndElement ); 459 XML_SetCharacterDataHandler( entity.mpParser, call_callbackCharacters ); 460 XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef ); 461 462 pushEntity( entity ); 463 try 464 { 465 // start the document 466 if( entity.mxDocumentHandler.is() ) 467 { 468 Reference< XLocator > xLoc( mxDocumentLocator.get() ); 469 entity.mxDocumentHandler->setDocumentLocator( xLoc ); 470 entity.mxDocumentHandler->startDocument(); 471 } 472 473 parse(); 474 475 // finish document 476 if( entity.mxDocumentHandler.is() ) 477 { 478 entity.mxDocumentHandler->endDocument(); 479 } 480 } 481 catch( SAXException & ) 482 { 483 popEntity(); 484 XML_ParserFree( entity.mpParser ); 485 throw; 486 } 487 catch( IOException & ) 488 { 489 popEntity(); 490 XML_ParserFree( entity.mpParser ); 491 throw; 492 } 493 catch( RuntimeException & ) 494 { 495 popEntity(); 496 XML_ParserFree( entity.mpParser ); 497 throw; 498 } 499 500 popEntity(); 501 XML_ParserFree( entity.mpParser ); 502 } 503 504 void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) throw (RuntimeException) 505 { 506 maData.mxDocumentHandler = Handler; 507 } 508 509 void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException) 510 { 511 maData.mxTokenHandler = Handler; 512 } 513 514 void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException) 515 { 516 if( NamespaceToken >= FastToken::NAMESPACE ) 517 { 518 if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW ) 519 { 520 maNamespaceMap[ NamespaceURL ] = NamespaceToken; 521 return; 522 } 523 } 524 throw IllegalArgumentException(); 525 } 526 527 void FastSaxParser::setErrorHandler(const Reference< XErrorHandler > & Handler) throw (RuntimeException) 528 { 529 maData.mxErrorHandler = Handler; 530 } 531 532 void FastSaxParser::setEntityResolver(const Reference < XEntityResolver > & Resolver) throw (RuntimeException) 533 { 534 maData.mxEntityResolver = Resolver; 535 } 536 537 void FastSaxParser::setLocale( const Locale & Locale ) throw (RuntimeException) 538 { 539 maData.maLocale = Locale; 540 } 541 542 OUString FastSaxParser::getImplementationName_Static(void) 543 { 544 return OUString::createFromAscii( PARSER_IMPLEMENTATION_NAME ); 545 } 546 547 Sequence< OUString > FastSaxParser::getSupportedServiceNames_Static(void) 548 { 549 Sequence<OUString> aRet(1); 550 aRet.getArray()[0] = ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(PARSER_SERVICE_NAME) ); 551 return aRet; 552 } 553 554 // XServiceInfo 555 OUString FastSaxParser::getImplementationName() throw (RuntimeException) 556 { 557 return getImplementationName_Static(); 558 } 559 560 // XServiceInfo 561 sal_Bool FastSaxParser::supportsService(const OUString& ServiceName) throw (RuntimeException) 562 { 563 Sequence< OUString > aSNL = getSupportedServiceNames(); 564 const OUString * pArray = aSNL.getConstArray(); 565 566 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) 567 if( pArray[i] == ServiceName ) 568 return sal_True; 569 570 return sal_False; 571 } 572 573 // XServiceInfo 574 Sequence< OUString > FastSaxParser::getSupportedServiceNames(void) throw (RuntimeException) 575 { 576 577 Sequence<OUString> seq(1); 578 seq.getArray()[0] = OUString::createFromAscii( PARSER_SERVICE_NAME ); 579 return seq; 580 } 581 582 583 /*--------------------------------------- 584 * 585 * Helper functions and classes 586 * 587 *-------------------------------------------*/ 588 589 namespace { 590 591 OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int32 nLine ) 592 { 593 const sal_Char* pMessage = ""; 594 switch( xmlE ) 595 { 596 case XML_ERROR_NONE: pMessage = "No"; break; 597 case XML_ERROR_NO_MEMORY: pMessage = "no memory"; break; 598 case XML_ERROR_SYNTAX: pMessage = "syntax"; break; 599 case XML_ERROR_NO_ELEMENTS: pMessage = "no elements"; break; 600 case XML_ERROR_INVALID_TOKEN: pMessage = "invalid token"; break; 601 case XML_ERROR_UNCLOSED_TOKEN: pMessage = "unclosed token"; break; 602 case XML_ERROR_PARTIAL_CHAR: pMessage = "partial char"; break; 603 case XML_ERROR_TAG_MISMATCH: pMessage = "tag mismatch"; break; 604 case XML_ERROR_DUPLICATE_ATTRIBUTE: pMessage = "duplicate attribute"; break; 605 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: pMessage = "junk after doc element"; break; 606 case XML_ERROR_PARAM_ENTITY_REF: pMessage = "parameter entity reference"; break; 607 case XML_ERROR_UNDEFINED_ENTITY: pMessage = "undefined entity"; break; 608 case XML_ERROR_RECURSIVE_ENTITY_REF: pMessage = "recursive entity reference"; break; 609 case XML_ERROR_ASYNC_ENTITY: pMessage = "async entity"; break; 610 case XML_ERROR_BAD_CHAR_REF: pMessage = "bad char reference"; break; 611 case XML_ERROR_BINARY_ENTITY_REF: pMessage = "binary entity reference"; break; 612 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: pMessage = "attribute external entity reference"; break; 613 case XML_ERROR_MISPLACED_XML_PI: pMessage = "misplaced xml processing instruction"; break; 614 case XML_ERROR_UNKNOWN_ENCODING: pMessage = "unknown encoding"; break; 615 case XML_ERROR_INCORRECT_ENCODING: pMessage = "incorrect encoding"; break; 616 case XML_ERROR_UNCLOSED_CDATA_SECTION: pMessage = "unclosed cdata section"; break; 617 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: pMessage = "external entity reference"; break; 618 case XML_ERROR_NOT_STANDALONE: pMessage = "not standalone"; break; 619 default:; 620 } 621 622 OUStringBuffer aBuffer( sal_Unicode( '[' ) ); 623 aBuffer.append( sSystemId ); 624 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " line " ) ); 625 aBuffer.append( nLine ); 626 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( "]: " ) ); 627 aBuffer.appendAscii( pMessage ); 628 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " error" ) ); 629 return aBuffer.makeStringAndClear(); 630 } 631 632 } // namespace 633 634 // starts parsing with actual parser ! 635 void FastSaxParser::parse() 636 { 637 const int BUFFER_SIZE = 16 * 1024; 638 Sequence< sal_Int8 > seqOut( BUFFER_SIZE ); 639 640 Entity& rEntity = getEntity(); 641 int nRead = 0; 642 do 643 { 644 nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE ); 645 if( nRead <= 0 ) 646 { 647 XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), 0, 1 ); 648 break; 649 } 650 651 bool bContinue = XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), nRead, 0 ) != 0; 652 // callbacks used inside XML_Parse may have caught an exception 653 if( !bContinue || rEntity.maSavedException.hasValue() ) 654 { 655 // Error during parsing ! 656 XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser ); 657 OUString sSystemId = mxDocumentLocator->getSystemId(); 658 sal_Int32 nLine = mxDocumentLocator->getLineNumber(); 659 660 SAXParseException aExcept( 661 lclGetErrorMessage( xmlE, sSystemId, nLine ), 662 Reference< XInterface >(), 663 Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ), 664 mxDocumentLocator->getPublicId(), 665 mxDocumentLocator->getSystemId(), 666 mxDocumentLocator->getLineNumber(), 667 mxDocumentLocator->getColumnNumber() 668 ); 669 670 // error handler is set, it may throw the exception 671 if( rEntity.mxErrorHandler.is() ) 672 rEntity.mxErrorHandler->fatalError( Any( aExcept ) ); 673 674 // error handler has not thrown, but parsing cannot go on, the 675 // exception MUST be thrown 676 throw aExcept; 677 } 678 } 679 while( nRead > 0 ); 680 } 681 682 //------------------------------------------ 683 // 684 // The C-Callbacks 685 // 686 //----------------------------------------- 687 688 namespace { 689 690 struct AttributeData 691 { 692 OString maPrefix; 693 OString maName; 694 OString maValue; 695 }; 696 697 } // namespace 698 699 void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes ) 700 { 701 Reference< XFastContextHandler > xParentContext; 702 Entity& rEntity = getEntity(); 703 if( !rEntity.maContextStack.empty() ) 704 { 705 xParentContext = rEntity.maContextStack.top()->mxContext; 706 if( !xParentContext.is() ) 707 { 708 // we ignore current elements, so no processing needed 709 pushContext(); 710 return; 711 } 712 } 713 714 pushContext(); 715 716 rEntity.mxAttributes->clear(); 717 718 // create attribute map and process namespace instructions 719 int i = 0; 720 sal_Int32 nNameLen, nPrefixLen; 721 const XML_Char *pName; 722 const XML_Char *pPrefix; 723 724 try 725 { 726 /* #158414# Each element may define new namespaces, also for attributes. 727 First, process all namespace attributes and cache other attributes in a 728 vector. Second, process the attributes after namespaces have been 729 initialized. */ 730 ::std::vector< AttributeData > aAttribs; 731 732 // #158414# first: get namespaces 733 for( ; awAttributes[i]; i += 2 ) 734 { 735 OSL_ASSERT( awAttributes[i+1] ); 736 737 splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen ); 738 if( nPrefixLen ) 739 { 740 if( (nPrefixLen == 5) && (strncmp( pPrefix, "xmlns", 5 ) == 0) ) 741 { 742 DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] ); 743 } 744 else 745 { 746 aAttribs.resize( aAttribs.size() + 1 ); 747 aAttribs.back().maPrefix = OString( pPrefix, nPrefixLen ); 748 aAttribs.back().maName = OString( pName, nNameLen ); 749 aAttribs.back().maValue = OString( awAttributes[i+1] ); 750 } 751 } 752 else 753 { 754 if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) ) 755 { 756 // namespace of the element found 757 rEntity.maContextStack.top()->maNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 ); 758 } 759 else 760 { 761 aAttribs.resize( aAttribs.size() + 1 ); 762 aAttribs.back().maName = OString( pName, nNameLen ); 763 aAttribs.back().maValue = OString( awAttributes[i+1] ); 764 } 765 } 766 } 767 768 // #158414# second: fill attribute list with other attributes 769 for( ::std::vector< AttributeData >::const_iterator aIt = aAttribs.begin(), aEnd = aAttribs.end(); aIt != aEnd; ++aIt ) 770 { 771 if( aIt->maPrefix.getLength() > 0 ) 772 { 773 sal_Int32 nAttributeToken = GetTokenWithPrefix( aIt->maPrefix, aIt->maName ); 774 if( nAttributeToken != FastToken::DONTKNOW ) 775 rEntity.mxAttributes->add( nAttributeToken, aIt->maValue ); 776 else 777 rEntity.mxAttributes->addUnknown( GetNamespaceURL( aIt->maPrefix ), aIt->maName, aIt->maValue ); 778 } 779 else 780 { 781 sal_Int32 nAttributeToken = GetToken( aIt->maName ); 782 if( nAttributeToken != FastToken::DONTKNOW ) 783 rEntity.mxAttributes->add( nAttributeToken, aIt->maValue ); 784 else 785 rEntity.mxAttributes->addUnknown( aIt->maName, aIt->maValue ); 786 } 787 } 788 789 sal_Int32 nElementToken; 790 splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen ); 791 if( nPrefixLen > 0 ) 792 nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen ); 793 else if( rEntity.maContextStack.top()->maNamespace.getLength() > 0 ) 794 nElementToken = GetTokenWithNamespaceURL( rEntity.maContextStack.top()->maNamespace, pName, nNameLen ); 795 else 796 nElementToken = GetToken( pName ); 797 rEntity.maContextStack.top()->mnElementToken = nElementToken; 798 799 Reference< XFastAttributeList > xAttr( rEntity.mxAttributes.get() ); 800 Reference< XFastContextHandler > xContext; 801 if( nElementToken == FastToken::DONTKNOW ) 802 { 803 if( nPrefixLen > 0 ) 804 rEntity.maContextStack.top()->maNamespace = GetNamespaceURL( pPrefix, nPrefixLen ); 805 806 const OUString aNamespace( rEntity.maContextStack.top()->maNamespace ); 807 const OUString aElementName( pPrefix, nPrefixLen, RTL_TEXTENCODING_UTF8 ); 808 rEntity.maContextStack.top()->maElementName = aElementName; 809 810 if( xParentContext.is() ) 811 xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr ); 812 else 813 xContext = rEntity.mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr ); 814 815 if( xContext.is() ) 816 { 817 rEntity.maContextStack.top()->mxContext = xContext; 818 xContext->startUnknownElement( aNamespace, aElementName, xAttr ); 819 } 820 } 821 else 822 { 823 if( xParentContext.is() ) 824 xContext = xParentContext->createFastChildContext( nElementToken, xAttr ); 825 else 826 xContext = rEntity.mxDocumentHandler->createFastChildContext( nElementToken, xAttr ); 827 828 829 if( xContext.is() ) 830 { 831 rEntity.maContextStack.top()->mxContext = xContext; 832 xContext->startFastElement( nElementToken, xAttr ); 833 } 834 } 835 } 836 catch( Exception& e ) 837 { 838 rEntity.maSavedException <<= e; 839 } 840 } 841 842 void FastSaxParser::callbackEndElement( const XML_Char* ) 843 { 844 Entity& rEntity = getEntity(); 845 OSL_ENSURE( !rEntity.maContextStack.empty(), "FastSaxParser::callbackEndElement - no context" ); 846 if( !rEntity.maContextStack.empty() ) 847 { 848 SaxContextImplPtr pContext = rEntity.maContextStack.top(); 849 const Reference< XFastContextHandler >& xContext( pContext->mxContext ); 850 if( xContext.is() ) try 851 { 852 sal_Int32 nElementToken = pContext->mnElementToken; 853 if( nElementToken != FastToken::DONTKNOW ) 854 xContext->endFastElement( nElementToken ); 855 else 856 xContext->endUnknownElement( pContext->maNamespace, pContext->maElementName ); 857 } 858 catch( Exception& e ) 859 { 860 rEntity.maSavedException <<= e; 861 } 862 863 popContext(); 864 } 865 } 866 867 868 void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen ) 869 { 870 Entity& rEntity = getEntity(); 871 const Reference< XFastContextHandler >& xContext( rEntity.maContextStack.top()->mxContext ); 872 if( xContext.is() ) try 873 { 874 xContext->characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) ); 875 } 876 catch( Exception& e ) 877 { 878 rEntity.maSavedException <<= e; 879 } 880 } 881 882 int FastSaxParser::callbackExternalEntityRef( XML_Parser parser, 883 const XML_Char *context, const XML_Char * /*base*/, const XML_Char *systemId, const XML_Char *publicId ) 884 { 885 bool bOK = true; 886 InputSource source; 887 888 Entity& rCurrEntity = getEntity(); 889 Entity aNewEntity( rCurrEntity ); 890 891 if( rCurrEntity.mxEntityResolver.is() ) try 892 { 893 aNewEntity.maStructSource = rCurrEntity.mxEntityResolver->resolveEntity( 894 OUString( publicId, strlen( publicId ), RTL_TEXTENCODING_UTF8 ) , 895 OUString( systemId, strlen( systemId ), RTL_TEXTENCODING_UTF8 ) ); 896 } 897 catch( SAXParseException & e ) 898 { 899 rCurrEntity.maSavedException <<= e; 900 bOK = false; 901 } 902 catch( SAXException & e ) 903 { 904 rCurrEntity.maSavedException <<= SAXParseException( 905 e.Message, e.Context, e.WrappedException, 906 mxDocumentLocator->getPublicId(), 907 mxDocumentLocator->getSystemId(), 908 mxDocumentLocator->getLineNumber(), 909 mxDocumentLocator->getColumnNumber() ); 910 bOK = false; 911 } 912 913 if( aNewEntity.maStructSource.aInputStream.is() ) 914 { 915 aNewEntity.mpParser = XML_ExternalEntityParserCreate( parser, context, 0 ); 916 if( !aNewEntity.mpParser ) 917 { 918 return false; 919 } 920 921 aNewEntity.maConverter.setInputStream( aNewEntity.maStructSource.aInputStream ); 922 pushEntity( aNewEntity ); 923 try 924 { 925 parse(); 926 } 927 catch( SAXParseException & e ) 928 { 929 rCurrEntity.maSavedException <<= e; 930 bOK = false; 931 } 932 catch( IOException &e ) 933 { 934 SAXException aEx; 935 aEx.WrappedException <<= e; 936 rCurrEntity.maSavedException <<= aEx; 937 bOK = false; 938 } 939 catch( RuntimeException &e ) 940 { 941 SAXException aEx; 942 aEx.WrappedException <<= e; 943 rCurrEntity.maSavedException <<= aEx; 944 bOK = false; 945 } 946 947 popEntity(); 948 XML_ParserFree( aNewEntity.mpParser ); 949 } 950 951 return bOK; 952 } 953 954 } // namespace sax_fastparser 955