1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 //#include <stdlib.h>
25 //#include <sal/alloca.h>
26 
27 #include <boost/scoped_ptr.hpp>
28 
29 #include <osl/diagnose.h>
30 #include <rtl/ustrbuf.hxx>
31 
32 #include <com/sun/star/lang/DisposedException.hpp>
33 #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
34 #include <com/sun/star/xml/sax/SAXParseException.hpp>
35 #include <com/sun/star/xml/sax/FastToken.hpp>
36 
37 #include "fastparser.hxx"
38 
39 #include <string.h>
40 
41 using ::rtl::OString;
42 using ::rtl::OUString;
43 using ::rtl::OUStringBuffer;
44 using namespace ::std;
45 using namespace ::osl;
46 using namespace ::cppu;
47 using namespace ::com::sun::star::uno;
48 using namespace ::com::sun::star::lang;
49 using namespace ::com::sun::star::xml::sax;
50 //using namespace ::com::sun::star::util;
51 using namespace ::com::sun::star::io;
52 
53 namespace sax_fastparser {
54 
55 // --------------------------------------------------------------------
56 
57 struct SaxContextImpl
58 {
59 	Reference< XFastContextHandler >	mxContext;
60 	sal_uInt32		mnNamespaceCount;
61 	sal_Int32		mnElementToken;
62 	OUString		maNamespace;
63 	OUString		maElementName;
64 
SaxContextImplsax_fastparser::SaxContextImpl65 	SaxContextImpl() { mnNamespaceCount = 0; mnElementToken = 0; }
SaxContextImplsax_fastparser::SaxContextImpl66 	SaxContextImpl( const SaxContextImplPtr& p ) { mnNamespaceCount = p->mnNamespaceCount; mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; }
67 };
68 
69 // --------------------------------------------------------------------
70 
71 struct NamespaceDefine
72 {
73 	OString		maPrefix;
74 	sal_Int32	mnToken;
75 	OUString	maNamespaceURL;
76 
NamespaceDefinesax_fastparser::NamespaceDefine77 	NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {}
78 };
79 
80 // --------------------------------------------------------------------
81 // FastLocatorImpl
82 // --------------------------------------------------------------------
83 
84 class FastSaxParser;
85 
86 class FastLocatorImpl : public WeakImplHelper1< XLocator >
87 {
88 public:
FastLocatorImpl(FastSaxParser * p)89 	FastLocatorImpl( FastSaxParser *p ) : mpParser(p) {}
90 
dispose()91 	void dispose() { mpParser = 0; }
checkDispose()92 	void checkDispose() throw (RuntimeException) { if( !mpParser ) throw DisposedException(); }
93 
94 	//XLocator
95     virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (RuntimeException);
96 	virtual sal_Int32 SAL_CALL getLineNumber(void) throw (RuntimeException);
97     virtual OUString SAL_CALL getPublicId(void) throw (RuntimeException);
98     virtual OUString SAL_CALL getSystemId(void) throw (RuntimeException);
99 
100 private:
101 	FastSaxParser *mpParser;
102 };
103 
104 // --------------------------------------------------------------------
105 // FastSaxParser
106 // --------------------------------------------------------------------
107 
108 //---------------------------------------------
109 // the implementation part
110 //---------------------------------------------
111 
112 extern "C" {
113 
call_callbackStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)114 static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts)
115 {
116     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
117     pFastParser->callbackStartElement( name, atts );
118 }
119 
call_callbackEndElement(void * userData,const XML_Char * name)120 static void call_callbackEndElement(void *userData, const XML_Char *name)
121 {
122     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
123     pFastParser->callbackEndElement( name );
124 }
125 
call_callbackCharacters(void * userData,const XML_Char * s,int nLen)126 static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen )
127 {
128     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
129     pFastParser->callbackCharacters( s, nLen );
130 }
131 
call_callbackExternalEntityRef(XML_Parser parser,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)132 static int call_callbackExternalEntityRef( XML_Parser parser,
133         const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId )
134 {
135     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( XML_GetUserData( parser ) );
136     return pFastParser->callbackExternalEntityRef( parser, openEntityNames, base, systemId, publicId );
137 }
138 
139 } // extern "C"
140 
141 // --------------------------------------------------------------------
142 // FastLocatorImpl implementation
143 // --------------------------------------------------------------------
144 
getColumnNumber(void)145 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber(void) throw (RuntimeException)
146 {
147 	checkDispose();
148 	return XML_GetCurrentColumnNumber( mpParser->getEntity().mpParser );
149 }
150 
151 // --------------------------------------------------------------------
152 
getLineNumber(void)153 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber(void) throw (RuntimeException)
154 {
155 	checkDispose();
156 	return XML_GetCurrentLineNumber( mpParser->getEntity().mpParser );
157 }
158 
159 // --------------------------------------------------------------------
160 
getPublicId(void)161 OUString SAL_CALL FastLocatorImpl::getPublicId(void) throw (RuntimeException)
162 {
163 	checkDispose();
164 	return mpParser->getEntity().maStructSource.sPublicId;
165 }
166 // --------------------------------------------------------------------
167 
getSystemId(void)168 OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
169 {
170 	checkDispose();
171 	return mpParser->getEntity().maStructSource.sSystemId;
172 }
173 
174 // --------------------------------------------------------------------
175 
ParserData()176 ParserData::ParserData()
177 {
178 }
179 
~ParserData()180 ParserData::~ParserData()
181 {
182 }
183 
184 // --------------------------------------------------------------------
185 
Entity(const ParserData & rData)186 Entity::Entity( const ParserData& rData ) :
187     ParserData( rData )
188 {
189 	// performance-Improvment. Reference is needed when calling the startTag callback.
190 	// Handing out the same object with every call is allowed (see sax-specification)
191 	mxAttributes.set( new FastAttributeList( mxTokenHandler ) );
192 }
193 
~Entity()194 Entity::~Entity()
195 {
196 }
197 
198 // --------------------------------------------------------------------
199 // FastSaxParser implementation
200 // --------------------------------------------------------------------
201 
FastSaxParser()202 FastSaxParser::FastSaxParser()
203 {
204 	mxDocumentLocator.set( new FastLocatorImpl( this ) );
205 }
206 
207 // --------------------------------------------------------------------
208 
~FastSaxParser()209 FastSaxParser::~FastSaxParser()
210 {
211 	if( mxDocumentLocator.is() )
212 		mxDocumentLocator->dispose();
213 }
214 
215 // --------------------------------------------------------------------
216 
pushContext()217 void FastSaxParser::pushContext()
218 {
219     Entity& rEntity = getEntity();
220 	if( rEntity.maContextStack.empty() )
221 	{
222         rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) );
223 		DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace");
224 	}
225 	else
226 	{
227         rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) );
228 	}
229 }
230 
231 // --------------------------------------------------------------------
232 
popContext()233 void FastSaxParser::popContext()
234 {
235     Entity& rEntity = getEntity();
236 	OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::popContext(), pop without push?" );
237 	if( !rEntity.maContextStack.empty() )
238 		rEntity.maContextStack.pop();
239 }
240 
241 // --------------------------------------------------------------------
242 
DefineNamespace(const OString & rPrefix,const sal_Char * pNamespaceURL)243 void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL )
244 {
245     Entity& rEntity = getEntity();
246 	OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::DefineNamespace(), I need a context!" );
247 	if( !rEntity.maContextStack.empty() )
248 	{
249 		sal_uInt32 nOffset = rEntity.maContextStack.top()->mnNamespaceCount++;
250 
251 		if( rEntity.maNamespaceDefines.size() <= nOffset )
252 			rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
253 
254 		const OUString aNamespaceURL( pNamespaceURL, strlen( pNamespaceURL ), RTL_TEXTENCODING_UTF8 );
255 		rEntity.maNamespaceDefines[nOffset].reset( new NamespaceDefine( rPrefix, GetNamespaceToken( aNamespaceURL ), aNamespaceURL ) );
256 	}
257 }
258 
259 // --------------------------------------------------------------------
260 
GetToken(const OString & rToken)261 sal_Int32 FastSaxParser::GetToken( const OString& rToken )
262 {
263     Sequence< sal_Int8 > aSeq( (sal_Int8*)rToken.getStr(), rToken.getLength() );
264 
265     return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
266 }
267 
GetToken(const sal_Char * pToken,sal_Int32 nLen)268 sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ )
269 {
270 	if( !nLen )
271 		nLen = strlen( pToken );
272 
273 	Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen );
274 
275 	return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
276 }
277 
278 // --------------------------------------------------------------------
279 
GetTokenWithPrefix(const OString & rPrefix,const OString & rName)280 sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (SAXException)
281 {
282     sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
283 
284     Entity& rEntity = getEntity();
285     sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
286     while( nNamespace-- )
287     {
288         if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
289         {
290             nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
291             break;
292         }
293 
294         if( !nNamespace )
295             throw SAXException(); // prefix that has no defined namespace url
296     }
297 
298     if( nNamespaceToken != FastToken::DONTKNOW )
299     {
300         sal_Int32 nNameToken = GetToken( rName.getStr(), rName.getLength() );
301         if( nNameToken != FastToken::DONTKNOW )
302             return nNamespaceToken | nNameToken;
303     }
304 
305     return FastToken::DONTKNOW;
306 }
307 
GetTokenWithPrefix(const sal_Char * pPrefix,int nPrefixLen,const sal_Char * pName,int nNameLen)308 sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException)
309 {
310 	sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
311 
312     Entity& rEntity = getEntity();
313 	sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
314 	while( nNamespace-- )
315 	{
316 		const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
317 		if( (rPrefix.getLength() == nPrefixLen) &&
318 			(strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) )
319 		{
320 			nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
321 			break;
322 		}
323 
324 		if( !nNamespace )
325 			throw SAXException(); // prefix that has no defined namespace url
326 	}
327 
328 	if( nNamespaceToken != FastToken::DONTKNOW )
329 	{
330 		sal_Int32 nNameToken = GetToken( pName, nNameLen );
331 		if( nNameToken != FastToken::DONTKNOW )
332 			return nNamespaceToken | nNameToken;
333 	}
334 
335 	return FastToken::DONTKNOW;
336 }
337 
338 // --------------------------------------------------------------------
339 
GetNamespaceToken(const OUString & rNamespaceURL)340 sal_Int32 FastSaxParser::GetNamespaceToken( const OUString& rNamespaceURL )
341 {
342 	NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
343 	if( aIter != maNamespaceMap.end() )
344 		return (*aIter).second;
345 	else
346 		return FastToken::DONTKNOW;
347 }
348 
349 // --------------------------------------------------------------------
350 
GetNamespaceURL(const OString & rPrefix)351 OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXException)
352 {
353     Entity& rEntity = getEntity();
354     if( !rEntity.maContextStack.empty() )
355     {
356         sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
357         while( nNamespace-- )
358             if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
359                 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
360     }
361 
362     throw SAXException(); // prefix that has no defined namespace url
363 }
364 
GetNamespaceURL(const sal_Char * pPrefix,int nPrefixLen)365 OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException)
366 {
367     Entity& rEntity = getEntity();
368 	if( pPrefix && !rEntity.maContextStack.empty() )
369 	{
370 		sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
371 		while( nNamespace-- )
372 		{
373 			const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
374 			if( (rPrefix.getLength() == nPrefixLen) &&
375 				(strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) )
376 			{
377 				return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
378 			}
379 		}
380 	}
381 
382 	throw SAXException(); // prefix that has no defined namespace url
383 }
384 
385 // --------------------------------------------------------------------
386 
GetTokenWithNamespaceURL(const OUString & rNamespaceURL,const sal_Char * pName,int nNameLen)387 sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen )
388 {
389 	sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL );
390 
391 	if( nNamespaceToken != FastToken::DONTKNOW )
392 	{
393 		sal_Int32 nNameToken = GetToken( pName, nNameLen );
394 		if( nNameToken != FastToken::DONTKNOW )
395 			return nNamespaceToken | nNameToken;
396 	}
397 
398 	return FastToken::DONTKNOW;
399 }
400 
401 // --------------------------------------------------------------------
402 
splitName(const XML_Char * pwName,const XML_Char * & rpPrefix,sal_Int32 & rPrefixLen,const XML_Char * & rpName,sal_Int32 & rNameLen)403 void FastSaxParser::splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen )
404 {
405 	XML_Char *p;
406 	for( p = const_cast< XML_Char* >( pwName ), rNameLen = 0, rPrefixLen = 0; *p; p++ )
407 	{
408 		if( *p == ':' )
409 		{
410 			rPrefixLen = p - pwName;
411 			rNameLen = 0;
412 		}
413 		else
414 		{
415 			rNameLen++;
416 		}
417 	}
418 	if( rPrefixLen )
419 	{
420 		rpPrefix = pwName;
421 		rpName = &pwName[ rPrefixLen + 1 ];
422 	}
423 	else
424 	{
425 		rpPrefix = 0;
426 		rpName = pwName;
427 	}
428 }
429 
430 /***************
431 *
432 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
433 * the file-specific initialization work. (During a parser run, external files may be opened)
434 *
435 ****************/
parseStream(const InputSource & maStructSource)436 void FastSaxParser::parseStream( const InputSource& maStructSource)	throw (SAXException, IOException, RuntimeException)
437 {
438 	// Only one text at one time
439 	MutexGuard guard( maMutex );
440 
441 	Entity entity( maData );
442 	entity.maStructSource = maStructSource;
443 
444 	if( !entity.maStructSource.aInputStream.is() )
445 		throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "No input source" ) ), Reference< XInterface >(), Any() );
446 
447 	entity.maConverter.setInputStream( entity.maStructSource.aInputStream );
448 	if( entity.maStructSource.sEncoding.getLength() )
449 		entity.maConverter.setEncoding(	OUStringToOString( entity.maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US ) );
450 
451 	// create parser with proper encoding
452 	entity.mpParser = XML_ParserCreate( 0 );
453 	if( !entity.mpParser )
454 		throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "Couldn't create parser" ) ), Reference< XInterface >(), Any() );
455 
456 	// set all necessary C-Callbacks
457 	XML_SetUserData( entity.mpParser, this );
458 	XML_SetElementHandler( entity.mpParser,	call_callbackStartElement, call_callbackEndElement );
459 	XML_SetCharacterDataHandler( entity.mpParser, call_callbackCharacters );
460 	XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef );
461 
462 	pushEntity( entity );
463 	try
464 	{
465 		// start the document
466 		if( entity.mxDocumentHandler.is() )
467 		{
468 			Reference< XLocator > xLoc( mxDocumentLocator.get() );
469 			entity.mxDocumentHandler->setDocumentLocator( xLoc );
470 			entity.mxDocumentHandler->startDocument();
471 		}
472 
473 		parse();
474 
475 		// finish document
476 		if( entity.mxDocumentHandler.is() )
477 		{
478 			entity.mxDocumentHandler->endDocument();
479 		}
480 	}
481 	catch( SAXException & )
482 	{
483 		popEntity();
484 		XML_ParserFree( entity.mpParser );
485   		throw;
486 	}
487 	catch( IOException & )
488 	{
489 		popEntity();
490 		XML_ParserFree( entity.mpParser );
491 		throw;
492 	}
493 	catch( RuntimeException & )
494 	{
495 		popEntity();
496 		XML_ParserFree( entity.mpParser );
497 		throw;
498 	}
499 
500 	popEntity();
501 	XML_ParserFree( entity.mpParser );
502 }
503 
setFastDocumentHandler(const Reference<XFastDocumentHandler> & Handler)504 void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) throw (RuntimeException)
505 {
506 	maData.mxDocumentHandler = Handler;
507 }
508 
setTokenHandler(const Reference<XFastTokenHandler> & Handler)509 void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException)
510 {
511 	maData.mxTokenHandler = Handler;
512 }
513 
registerNamespace(const OUString & NamespaceURL,sal_Int32 NamespaceToken)514 void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException)
515 {
516 	if( NamespaceToken >= FastToken::NAMESPACE )
517 	{
518 		if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
519 		{
520 			maNamespaceMap[ NamespaceURL ] = NamespaceToken;
521 			return;
522 		}
523 	}
524 	throw IllegalArgumentException();
525 }
526 
setErrorHandler(const Reference<XErrorHandler> & Handler)527 void FastSaxParser::setErrorHandler(const Reference< XErrorHandler > & Handler) throw (RuntimeException)
528 {
529 	maData.mxErrorHandler = Handler;
530 }
531 
setEntityResolver(const Reference<XEntityResolver> & Resolver)532 void FastSaxParser::setEntityResolver(const Reference < XEntityResolver > & Resolver) throw (RuntimeException)
533 {
534 	maData.mxEntityResolver = Resolver;
535 }
536 
setLocale(const Locale & Locale)537 void FastSaxParser::setLocale( const Locale & Locale ) throw (RuntimeException)
538 {
539 	maData.maLocale = Locale;
540 }
541 
getSupportedServiceNames_Static(void)542 Sequence< OUString > FastSaxParser::getSupportedServiceNames_Static(void)
543 {
544 	Sequence<OUString> aRet(1);
545 	aRet.getArray()[0] = ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(PARSER_SERVICE_NAME) );
546 	return aRet;
547 }
548 
549 // XServiceInfo
getImplementationName()550 OUString FastSaxParser::getImplementationName() throw (RuntimeException)
551 {
552     return OUString::createFromAscii( PARSER_IMPLEMENTATION_NAME );
553 }
554 
555 // XServiceInfo
supportsService(const OUString & ServiceName)556 sal_Bool FastSaxParser::supportsService(const OUString& ServiceName) throw (RuntimeException)
557 {
558     Sequence< OUString > aSNL = getSupportedServiceNames();
559     const OUString * pArray = aSNL.getConstArray();
560 
561     for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
562         if( pArray[i] == ServiceName )
563             return sal_True;
564 
565     return sal_False;
566 }
567 
568 // XServiceInfo
getSupportedServiceNames(void)569 Sequence< OUString > FastSaxParser::getSupportedServiceNames(void) throw (RuntimeException)
570 {
571 
572     Sequence<OUString> seq(1);
573     seq.getArray()[0] = OUString::createFromAscii( PARSER_SERVICE_NAME );
574     return seq;
575 }
576 
577 
578 /*---------------------------------------
579 *
580 * Helper functions and classes
581 *
582 *-------------------------------------------*/
583 
584 namespace {
585 
lclGetErrorMessage(XML_Error xmlE,const OUString & sSystemId,sal_Int32 nLine)586 OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int32 nLine )
587 {
588 	const sal_Char* pMessage = "";
589 	switch( xmlE )
590 	{
591         case XML_ERROR_NONE:                            pMessage = "No";                                    break;
592         case XML_ERROR_NO_MEMORY:                       pMessage = "no memory";                             break;
593         case XML_ERROR_SYNTAX:                          pMessage = "syntax";                                break;
594         case XML_ERROR_NO_ELEMENTS:                     pMessage = "no elements";                           break;
595         case XML_ERROR_INVALID_TOKEN:                   pMessage = "invalid token";                         break;
596         case XML_ERROR_UNCLOSED_TOKEN:                  pMessage = "unclosed token";                        break;
597         case XML_ERROR_PARTIAL_CHAR:                    pMessage = "partial char";                          break;
598         case XML_ERROR_TAG_MISMATCH:                    pMessage = "tag mismatch";                          break;
599         case XML_ERROR_DUPLICATE_ATTRIBUTE:             pMessage = "duplicate attribute";                   break;
600         case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:          pMessage = "junk after doc element";                break;
601         case XML_ERROR_PARAM_ENTITY_REF:                pMessage = "parameter entity reference";            break;
602         case XML_ERROR_UNDEFINED_ENTITY:                pMessage = "undefined entity";                      break;
603         case XML_ERROR_RECURSIVE_ENTITY_REF:            pMessage = "recursive entity reference";            break;
604         case XML_ERROR_ASYNC_ENTITY:                    pMessage = "async entity";                          break;
605         case XML_ERROR_BAD_CHAR_REF:                    pMessage = "bad char reference";                    break;
606         case XML_ERROR_BINARY_ENTITY_REF:               pMessage = "binary entity reference";               break;
607         case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:   pMessage = "attribute external entity reference";   break;
608         case XML_ERROR_MISPLACED_XML_PI:                pMessage = "misplaced xml processing instruction";  break;
609         case XML_ERROR_UNKNOWN_ENCODING:                pMessage = "unknown encoding";                      break;
610         case XML_ERROR_INCORRECT_ENCODING:              pMessage = "incorrect encoding";                    break;
611         case XML_ERROR_UNCLOSED_CDATA_SECTION:          pMessage = "unclosed cdata section";                break;
612         case XML_ERROR_EXTERNAL_ENTITY_HANDLING:        pMessage = "external entity reference";             break;
613         case XML_ERROR_NOT_STANDALONE:                  pMessage = "not standalone";                        break;
614         default:;
615     }
616 
617 	OUStringBuffer aBuffer( sal_Unicode( '[' ) );
618 	aBuffer.append( sSystemId );
619 	aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " line " ) );
620 	aBuffer.append( nLine );
621 	aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( "]: " ) );
622 	aBuffer.appendAscii( pMessage );
623 	aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " error" ) );
624 	return aBuffer.makeStringAndClear();
625 }
626 
627 } // namespace
628 
629 // starts parsing with actual parser !
parse()630 void FastSaxParser::parse()
631 {
632 	const int BUFFER_SIZE = 16 * 1024;
633 	Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
634 
635     Entity& rEntity = getEntity();
636 	int nRead = 0;
637     do
638 	{
639 		nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
640 		if( nRead <= 0 )
641 		{
642 			XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), 0, 1 );
643 			break;
644 		}
645 
646 		bool bContinue = XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), nRead, 0 ) != 0;
647 		// callbacks used inside XML_Parse may have caught an exception
648 		if( !bContinue || rEntity.maSavedException.hasValue() )
649 		{
650 			// Error during parsing !
651 			XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser );
652 			OUString sSystemId = mxDocumentLocator->getSystemId();
653 			sal_Int32 nLine = mxDocumentLocator->getLineNumber();
654 
655 			SAXParseException aExcept(
656 				lclGetErrorMessage( xmlE, sSystemId, nLine ),
657 				Reference< XInterface >(),
658 				Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ),
659 				mxDocumentLocator->getPublicId(),
660 				mxDocumentLocator->getSystemId(),
661 				mxDocumentLocator->getLineNumber(),
662 				mxDocumentLocator->getColumnNumber()
663 			);
664 
665             // error handler is set, it may throw the exception
666 			if( rEntity.mxErrorHandler.is() )
667 				rEntity.mxErrorHandler->fatalError( Any( aExcept ) );
668 
669 			// error handler has not thrown, but parsing cannot go on, the
670             // exception MUST be thrown
671 			throw aExcept;
672 		}
673 	}
674 	while( nRead > 0 );
675 }
676 
677 //------------------------------------------
678 //
679 // The C-Callbacks
680 //
681 //-----------------------------------------
682 
683 namespace {
684 
685 struct AttributeData
686 {
687     OString             maPrefix;
688     OString             maName;
689     OString             maValue;
690 };
691 
692 } // namespace
693 
callbackStartElement(const XML_Char * pwName,const XML_Char ** awAttributes)694 void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes )
695 {
696 	Reference< XFastContextHandler > xParentContext;
697 	Entity& rEntity = getEntity();
698 	if( !rEntity.maContextStack.empty() )
699 	{
700 		xParentContext = rEntity.maContextStack.top()->mxContext;
701 		if( !xParentContext.is() )
702 		{
703 			// we ignore current elements, so no processing needed
704 			pushContext();
705 			return;
706 		}
707 	}
708 
709 	pushContext();
710 
711 	rEntity.mxAttributes->clear();
712 
713 	// create attribute map and process namespace instructions
714 	int i = 0;
715 	sal_Int32 nNameLen, nPrefixLen;
716 	const XML_Char *pName;
717 	const XML_Char *pPrefix;
718 
719 	try
720 	{
721         /*  #158414# Each element may define new namespaces, also for attribues.
722             First, process all namespace attributes and cache other attributes in a
723             vector. Second, process the attributes after namespaces have been
724             initialized. */
725         ::std::vector< AttributeData > aAttribs;
726 
727         // #158414# first: get namespaces
728     	for( ; awAttributes[i]; i += 2 )
729     	{
730     		OSL_ASSERT( awAttributes[i+1] );
731 
732     		splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen );
733     		if( nPrefixLen )
734     		{
735     			if( (nPrefixLen == 5) && (strncmp( pPrefix, "xmlns", 5 ) == 0) )
736     			{
737     				DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] );
738     			}
739     			else
740     			{
741                     aAttribs.resize( aAttribs.size() + 1 );
742                     aAttribs.back().maPrefix = OString( pPrefix, nPrefixLen );
743                     aAttribs.back().maName = OString( pName, nNameLen );
744                     aAttribs.back().maValue = OString( awAttributes[i+1] );
745                 }
746     		}
747     		else
748     		{
749     			if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) )
750     			{
751     				// namespace of the element found
752     				rEntity.maContextStack.top()->maNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
753     			}
754     			else
755     			{
756                     aAttribs.resize( aAttribs.size() + 1 );
757                     aAttribs.back().maName = OString( pName, nNameLen );
758                     aAttribs.back().maValue = OString( awAttributes[i+1] );
759     			}
760     		}
761     	}
762 
763         // #158414# second: fill attribute list with other attributes
764         for( ::std::vector< AttributeData >::const_iterator aIt = aAttribs.begin(), aEnd = aAttribs.end(); aIt != aEnd; ++aIt )
765         {
766             if( aIt->maPrefix.getLength() > 0 )
767             {
768                 sal_Int32 nAttributeToken = GetTokenWithPrefix( aIt->maPrefix, aIt->maName );
769                 if( nAttributeToken != FastToken::DONTKNOW )
770                     rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
771                 else
772                     rEntity.mxAttributes->addUnknown( GetNamespaceURL( aIt->maPrefix ), aIt->maName, aIt->maValue );
773             }
774             else
775             {
776                 sal_Int32 nAttributeToken = GetToken( aIt->maName );
777                 if( nAttributeToken != FastToken::DONTKNOW )
778                     rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
779                 else
780                     rEntity.mxAttributes->addUnknown( aIt->maName, aIt->maValue );
781             }
782         }
783 
784     	sal_Int32 nElementToken;
785     	splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen );
786     	if( nPrefixLen > 0 )
787     		nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
788     	else if( rEntity.maContextStack.top()->maNamespace.getLength() > 0 )
789     		nElementToken = GetTokenWithNamespaceURL( rEntity.maContextStack.top()->maNamespace, pName, nNameLen );
790     	else
791     		nElementToken = GetToken( pName );
792     	rEntity.maContextStack.top()->mnElementToken = nElementToken;
793 
794 		Reference< XFastAttributeList > xAttr( rEntity.mxAttributes.get() );
795 		Reference< XFastContextHandler > xContext;
796 		if( nElementToken == FastToken::DONTKNOW )
797 		{
798 			if( nPrefixLen > 0 )
799 				rEntity.maContextStack.top()->maNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
800 
801 			const OUString aNamespace( rEntity.maContextStack.top()->maNamespace );
802 			const OUString aElementName( pPrefix, nPrefixLen, RTL_TEXTENCODING_UTF8 );
803 			rEntity.maContextStack.top()->maElementName = aElementName;
804 
805 			if( xParentContext.is() )
806 				xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
807 			else
808 				xContext = rEntity.mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
809 
810 			if( xContext.is() )
811 			{
812 				rEntity.maContextStack.top()->mxContext = xContext;
813 				xContext->startUnknownElement( aNamespace, aElementName, xAttr );
814 			}
815 		}
816 		else
817 		{
818 			if( xParentContext.is() )
819 				xContext = xParentContext->createFastChildContext( nElementToken, xAttr );
820 			else
821 				xContext = rEntity.mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
822 
823 
824 			if( xContext.is() )
825 			{
826 				rEntity.maContextStack.top()->mxContext = xContext;
827 				xContext->startFastElement( nElementToken, xAttr );
828 			}
829 		}
830 	}
831 	catch( Exception& e )
832 	{
833 		rEntity.maSavedException <<= e;
834 	}
835 }
836 
callbackEndElement(const XML_Char *)837 void FastSaxParser::callbackEndElement( const XML_Char* )
838 {
839     Entity& rEntity = getEntity();
840     OSL_ENSURE( !rEntity.maContextStack.empty(), "FastSaxParser::callbackEndElement - no context" );
841 	if( !rEntity.maContextStack.empty() )
842 	{
843 		SaxContextImplPtr pContext = rEntity.maContextStack.top();
844 		const Reference< XFastContextHandler >& xContext( pContext->mxContext );
845 		if( xContext.is() ) try
846 		{
847 			sal_Int32 nElementToken = pContext->mnElementToken;
848 			if( nElementToken != FastToken::DONTKNOW )
849 				xContext->endFastElement( nElementToken );
850 			else
851 				xContext->endUnknownElement( pContext->maNamespace, pContext->maElementName );
852 		}
853 		catch( Exception& e )
854 		{
855 			rEntity.maSavedException <<= e;
856 		}
857 
858 		popContext();
859 	}
860 }
861 
862 
callbackCharacters(const XML_Char * s,int nLen)863 void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
864 {
865     Entity& rEntity = getEntity();
866 	const Reference< XFastContextHandler >& xContext( rEntity.maContextStack.top()->mxContext );
867 	if( xContext.is() ) try
868 	{
869 		xContext->characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) );
870 	}
871 	catch( Exception& e )
872 	{
873 		rEntity.maSavedException <<= e;
874 	}
875 }
876 
callbackExternalEntityRef(XML_Parser parser,const XML_Char * context,const XML_Char *,const XML_Char * systemId,const XML_Char * publicId)877 int FastSaxParser::callbackExternalEntityRef( XML_Parser parser,
878         const XML_Char *context, const XML_Char * /*base*/, const XML_Char *systemId, const XML_Char *publicId )
879 {
880 	bool bOK = true;
881 	InputSource source;
882 
883     Entity& rCurrEntity = getEntity();
884 	Entity aNewEntity( rCurrEntity );
885 
886 	if( rCurrEntity.mxEntityResolver.is() ) try
887 	{
888     	aNewEntity.maStructSource = rCurrEntity.mxEntityResolver->resolveEntity(
889 			OUString( publicId, strlen( publicId ), RTL_TEXTENCODING_UTF8 ) ,
890 			OUString( systemId, strlen( systemId ), RTL_TEXTENCODING_UTF8 ) );
891     }
892     catch( SAXParseException & e )
893 	{
894     	rCurrEntity.maSavedException <<= e;
895     	bOK = false;
896     }
897     catch( SAXException & e )
898 	{
899     	rCurrEntity.maSavedException <<= SAXParseException(
900 			e.Message, e.Context, e.WrappedException,
901 			mxDocumentLocator->getPublicId(),
902 			mxDocumentLocator->getSystemId(),
903 			mxDocumentLocator->getLineNumber(),
904 			mxDocumentLocator->getColumnNumber() );
905 		bOK = false;
906     }
907 
908 	if( aNewEntity.maStructSource.aInputStream.is() )
909 	{
910 		aNewEntity.mpParser = XML_ExternalEntityParserCreate( parser, context, 0 );
911 		if( !aNewEntity.mpParser )
912 		{
913 			return false;
914 		}
915 
916 		aNewEntity.maConverter.setInputStream( aNewEntity.maStructSource.aInputStream );
917 		pushEntity( aNewEntity );
918 		try
919 		{
920 			parse();
921 		}
922 		catch( SAXParseException & e )
923 		{
924 			rCurrEntity.maSavedException <<= e;
925 			bOK = false;
926 		}
927 		catch( IOException &e )
928 		{
929 			SAXException aEx;
930 			aEx.WrappedException <<= e;
931 			rCurrEntity.maSavedException <<= aEx;
932 			bOK = false;
933 		}
934 		catch( RuntimeException &e )
935 		{
936 			SAXException aEx;
937 			aEx.WrappedException <<= e;
938 			rCurrEntity.maSavedException <<= aEx;
939 			bOK = false;
940 		}
941 
942 		popEntity();
943 		XML_ParserFree( aNewEntity.mpParser );
944 	}
945 
946 	return bOK;
947 }
948 
949 } // namespace sax_fastparser
950