xref: /aoo41x/main/l10ntools/inc/xmlparse.hxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #ifndef BOOTSTRP_XMLPARSE_HXX
29 #define BOOTSTRP_XMLPARSE_HXX
30 
31 #include <signal.h>
32 #include <expat.h>
33 #include <rtl/ustring.hxx>
34 #include <rtl/ustrbuf.hxx>
35 #include "tools/string.hxx"
36 #include "tools/list.hxx"
37 #define ENABLE_BYTESTRING_STREAM_OPERATORS
38 #include "tools/stream.hxx"
39 #include "tools/isofallback.hxx"
40 #include "export.hxx"
41 #include "xmlutil.hxx"
42 
43 #include <fstream>
44 #include <iostream>
45 
46 class XMLParentNode;
47 class XMLElement;
48 
49 
50 using namespace ::rtl;
51 using namespace std;
52 
53 #include <hash_map> /* std::hashmap*/
54 #include <deque>	/* std::deque*/
55 #include <iterator> /* std::iterator*/
56 #include <list>		/* std::list*/
57 #include <vector>	/* std::vector*/
58 #define XML_NODE_TYPE_FILE			0x001
59 #define XML_NODE_TYPE_ELEMENT		0x002
60 #define XML_NODE_TYPE_DATA			0x003
61 #define XML_NODE_TYPE_COMMENT		0x004
62 #define XML_NODE_TYPE_DEFAULT		0x005
63 #define MAX_LANGUAGES				99
64 
65 
66 //#define TESTDRIVER		/* use xml2gsi testclass */
67 //-------------------------------------------------------------------------
68 
69 /** Holds data of Attributes
70  */
71 class XMLAttribute : public String
72 {
73 private:
74 	String sValue;
75 
76 public:
77 	/// creates an attribute
78 	XMLAttribute(
79 		const String &rName, 	// attributes name
80 		const String &rValue	// attributes data
81 	)
82 				: String( rName ), sValue( rValue ) {}
83 
84     /// getting value of an attribue
85 	const String &GetValue() { return sValue; }
86 
87     void setValue(const String &rValue){sValue=rValue;}
88 
89 	/// returns true if two attributes are equal and have the same value
90 	sal_Bool IsEqual(
91 		const XMLAttribute &rAttribute	// the attribute which has to be equal
92 	)
93 	{
94 		return (( rAttribute == *this ) && ( rAttribute.sValue == sValue ));
95 	}
96 };
97 
98 DECLARE_LIST( XMLAttributeList, XMLAttribute * )
99 
100 //-------------------------------------------------------------------------
101 
102 /** Virtual base to handle different kinds of XML nodes
103  */
104 class XMLNode
105 {
106 protected:
107 	XMLNode() {}
108 
109 public:
110 	virtual sal_uInt16 GetNodeType() = 0;
111     virtual ~XMLNode() {}
112 };
113 
114 //-------------------------------------------------------------------------
115 
116 /** Virtual base to handle different kinds of child nodes
117  */
118 class XMLChildNode : public XMLNode
119 {
120 private:
121 	XMLParentNode *pParent;
122 
123 protected:
124 	XMLChildNode( XMLParentNode *pPar );
125     XMLChildNode():pParent( NULL ){};
126     XMLChildNode( const XMLChildNode& obj);
127     XMLChildNode& operator=(const XMLChildNode& obj);
128 public:
129 	virtual sal_uInt16 GetNodeType() = 0;
130 
131 	/// returns the parent of this node
132 	XMLParentNode *GetParent() { return pParent; }
133 	virtual ~XMLChildNode(){};
134 };
135 
136 DECLARE_LIST( XMLChildNodeList, XMLChildNode * )
137 
138 //-------------------------------------------------------------------------
139 
140 /** Virtual base to handle different kinds of parent nodes
141  */
142 class XMLData;
143 
144 class XMLParentNode : public XMLChildNode
145 {
146 private:
147 	XMLChildNodeList *pChildList;
148 	static int dbgcnt;
149     //int         nParentPos;
150 protected:
151 	XMLParentNode( XMLParentNode *pPar )
152 				: XMLChildNode( pPar ), pChildList( NULL )
153               {
154 			  }
155 	XMLParentNode(): pChildList(NULL){
156 	}
157     /// Copyconstructor
158     XMLParentNode( const XMLParentNode& );
159 
160     XMLParentNode& operator=(const XMLParentNode& obj);
161     virtual ~XMLParentNode();
162 
163 
164 public:
165 	virtual sal_uInt16 GetNodeType() = 0;
166 
167 	/// returns child list of this node
168 	XMLChildNodeList *GetChildList() { return pChildList; }
169 
170 	/// adds a new child
171 	void AddChild(
172 		XMLChildNode *pChild  	/// the new child
173 	);
174 
175     void AddChild(
176 		XMLChildNode *pChild , int pos 	/// the new child
177 	);
178 
179     virtual int GetPosition( ByteString id );
180     int RemoveChild( XMLElement *pRefElement );
181 	void RemoveAndDeleteAllChilds();
182 
183 	/// returns a child element which matches the given one
184 	XMLElement *GetChildElement(
185 		XMLElement *pRefElement	// the reference elelement
186 	);
187 };
188 
189 //-------------------------------------------------------------------------
190 
191 DECLARE_LIST( XMLStringList, XMLElement* )
192 
193 /// Mapping numeric Language code <-> XML Element
194 typedef std::hash_map< ByteString ,XMLElement* , hashByteString,equalByteString > LangHashMap;
195 
196 /// Mapping XML Element string identifier <-> Language Map
197 typedef std::hash_map<ByteString , LangHashMap* ,
198 					  hashByteString,equalByteString>					XMLHashMap;
199 
200 /// Mapping iso alpha string code <-> iso numeric code
201 typedef std::hash_map<ByteString, int, hashByteString,equalByteString>	HashMap;
202 
203 /// Mapping XML tag names <-> have localizable strings
204 typedef std::hash_map<ByteString , sal_Bool ,
205 					  hashByteString,equalByteString>					TagMap;
206 
207 /** Holds information of a XML file, is root node of tree
208  */
209 
210 
211 class XMLFile : public XMLParentNode
212 {
213 public:
214 	XMLFile() ;
215 	XMLFile(
216 				const String &rFileName // the file name, empty if created from memory stream
217 	);
218     XMLFile( const XMLFile& obj ) ;
219     ~XMLFile();
220 
221     ByteString*	GetGroupID(std::deque<ByteString> &groupid);
222 	void 		Print( XMLNode *pCur = NULL, sal_uInt16 nLevel = 0 );
223 	virtual void SearchL10NElements( XMLParentNode *pCur, int pos = 0 );
224 	void		Extract( XMLFile *pCur = NULL );
225 	void		View();
226 //	void static Signal_handler(int signo);//void*,oslSignalInfo * pInfo);
227 	void		showType(XMLParentNode* node);
228 
229 	XMLHashMap* GetStrings(){return XMLStrings;}
230 	sal_Bool 		Write( ByteString &rFilename );
231 	sal_Bool 		Write( ofstream &rStream , XMLNode *pCur = NULL );
232 
233     bool        CheckExportStatus( XMLParentNode *pCur = NULL );// , int pos = 0 );
234 
235     XMLFile&    operator=(const XMLFile& obj);
236 
237 	virtual sal_uInt16 	GetNodeType();
238 
239 	/// returns file name
240 	const String &GetName() { return sFileName; }
241     void          SetName( const String &rFilename ) { sFileName = rFilename; }
242     void          SetFullName( const String &rFullFilename ) { sFullName = rFullFilename; }
243     const std::vector<ByteString> getOrder(){ return order; }
244 
245 protected:
246 	// writes a string as UTF8 with dos line ends to a given stream
247     void        WriteString( ofstream &rStream, const String &sString );
248 
249     // quotes the given text for writing to a file
250 	void 		QuotHTML( String &rString );
251 
252 	void		InsertL10NElement( XMLElement* pElement);
253 
254 	// DATA
255 	String 		sFileName;
256     String      sFullName;
257 
258 	const ByteString ID,OLDREF,XML_LANG;
259 
260 	TagMap		nodes_localize;
261 	XMLHashMap* XMLStrings;
262 
263     std::vector <ByteString> order;
264 };
265 
266 /// An Utility class for XML
267 /// See RFC 3066 / #i8252# for ISO codes
268 class XMLUtil{
269 
270 public:
271     /// Quot the XML characters and replace \n \t
272     static void         QuotHTML( String &rString );
273 
274     /// UnQuot the XML characters and restore \n \t
275     static void         UnQuotHTML  ( String &rString );
276 
277     /// Return the numeric iso language code
278     //sal_uInt16		        GetLangByIsoLang( const ByteString &rIsoLang );
279 
280     /// Return the alpha strings representation
281     ByteString	        GetIsoLangByIndex( sal_uInt16 nIndex );
282 
283     static XMLUtil&     Instance();
284     ~XMLUtil();
285 
286     void         dump();
287 
288 private:
289     /// Mapping iso alpha string code <-> iso numeric code
290     HashMap      lMap;
291 
292     /// Mapping iso numeric code      <-> iso alpha string code
293     ByteString	 isoArray[MAX_LANGUAGES];
294 
295     static void UnQuotData( String &rString );
296     static void UnQuotTags( String &rString );
297 
298 	XMLUtil();
299 	XMLUtil(const XMLUtil&);
300 
301 };
302 
303 
304 
305 //-------------------------------------------------------------------------
306 
307 /** Hold information of an element node
308  */
309 class XMLElement : public XMLParentNode
310 {
311 private:
312 	String sElementName;
313 	XMLAttributeList *pAttributes;
314 	ByteString 	 project,
315 			     filename,
316 			     id,
317 			     sOldRef,
318 			     resourceType,
319 			     languageId;
320     int          nPos;
321 
322 protected:
323 	void Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement);
324 public:
325 	/// create a element node
326 	XMLElement(){}
327     XMLElement(
328 		const String &rName, 	// the element name
329 		XMLParentNode *Parent 	// parent node of this element
330 	):			XMLParentNode( Parent ),
331 				sElementName( rName ),
332 				pAttributes( NULL ),
333 				project(""),
334 				filename(""),
335 				id(""),
336 				sOldRef(""),
337 				resourceType(""),
338 				languageId(""),
339                 nPos(0)
340    				{
341 				}
342 	~XMLElement();
343     XMLElement(const XMLElement&);
344 
345     XMLElement& operator=(const XMLElement& obj);
346 	/// returns node type XML_NODE_ELEMENT
347 	virtual sal_uInt16 GetNodeType();
348 
349 	/// returns element name
350 	const String &GetName() { return sElementName; }
351 
352 	/// returns list of attributes of this element
353 	XMLAttributeList *GetAttributeList() { return pAttributes; }
354 
355 	/// adds a new attribute to this element, typically used by parser
356 	void AddAttribute( const String &rAttribute, const String &rValue );
357 
358     void ChangeLanguageTag( const String &rValue );
359 	// Return a ASCII String representation of this object
360 	OString ToOString();
361 
362 	// Return a Unicode String representation of this object
363 	OUString ToOUString();
364 
365 	bool	Equals(OUString refStr);
366 
367 	/// returns a attribute
368 	XMLAttribute *GetAttribute(
369 		const String &rName	// the attribute name
370 	);
371 	void SetProject         ( ByteString prj        ){ project = prj;        }
372 	void SetFileName        ( ByteString fn         ){ filename = fn;        }
373 	void SetId              ( ByteString theId      ){ id = theId;           }
374 	void SetResourceType    ( ByteString rt         ){ resourceType = rt;    }
375 	void SetLanguageId      ( ByteString lid        ){ languageId = lid;     }
376     void SetPos             ( int nPos_in           ){ nPos = nPos_in;       }
377     void SetOldRef          ( ByteString sOldRef_in ){ sOldRef = sOldRef_in; }
378 
379     virtual int        GetPos()         { return nPos;         }
380     ByteString GetProject()     { return project;      }
381 	ByteString GetFileName()    { return filename;     }
382 	ByteString GetId()          { return id;           }
383 	ByteString GetOldref()      { return sOldRef;      }
384 	ByteString GetResourceType(){ return resourceType; }
385 	ByteString GetLanguageId()  { return languageId;   }
386 
387 
388 };
389 //-------------------------------------------------------------------------
390 
391 
392 /** Holds character data
393  */
394 class XMLData : public XMLChildNode
395 {
396 private:
397 	String sData;
398     bool   isNewCreated;
399 
400 public:
401 	/// create a data node
402 	XMLData(
403 		const String &rData, 	// the initial data
404 		XMLParentNode *Parent	// the parent node of this data, typically a element node
405 	)
406 				: XMLChildNode( Parent ), sData( rData ) , isNewCreated ( false ){}
407 	XMLData(
408 		const String &rData, 	// the initial data
409 		XMLParentNode *Parent,	// the parent node of this data, typically a element node
410         bool newCreated
411     )
412 				: XMLChildNode( Parent ), sData( rData ) , isNewCreated ( newCreated ){}
413 
414     XMLData(const XMLData& obj);
415 
416     XMLData& operator=(const XMLData& obj);
417 	virtual sal_uInt16 GetNodeType();
418 
419 	/// returns the data
420 	const String &GetData() { return sData; }
421 
422     bool isNew() { return isNewCreated; }
423     /// adds new character data to the existing one
424 	void AddData(
425 		const String &rData	// the new data
426 	);
427 
428 
429 
430 };
431 
432 //-------------------------------------------------------------------------
433 
434 /** Holds comments
435  */
436 class XMLComment : public XMLChildNode
437 {
438 private:
439 	String sComment;
440 
441 public:
442 	/// create a comment node
443 	XMLComment(
444 		const String &rComment,	// the comment
445 		XMLParentNode *Parent	// the parent node of this comemnt, typically a element node
446 	)
447 				: XMLChildNode( Parent ), sComment( rComment ) {}
448 
449 	virtual sal_uInt16 GetNodeType();
450 
451     XMLComment( const XMLComment& obj );
452 
453     XMLComment& operator=(const XMLComment& obj);
454 
455     /// returns the comment
456 	const String &GetComment()  { return sComment; }
457 };
458 
459 //-------------------------------------------------------------------------
460 
461 /** Holds additional file content like those for which no handler exists
462  */
463 class XMLDefault : public XMLChildNode
464 {
465 private:
466 	String sDefault;
467 
468 public:
469 	/// create a comment node
470 	XMLDefault(
471 		const String &rDefault,	// the comment
472 		XMLParentNode *Parent	// the parent node of this comemnt, typically a element node
473 	)
474 				: XMLChildNode( Parent ), sDefault( rDefault ) {}
475 
476     XMLDefault(const XMLDefault& obj);
477 
478     XMLDefault& operator=(const XMLDefault& obj);
479 
480     /// returns node type XML_NODE_TYPE_COMMENT
481 	virtual sal_uInt16 GetNodeType();
482 
483 	/// returns the comment
484 	const String &GetDefault()  { return sDefault; }
485 };
486 
487 //-------------------------------------------------------------------------
488 
489 /** struct for error information, used by class SimpleXMLParser
490  */
491 struct XMLError {
492 	XML_Error eCode;	// the error code
493 	sal_uLong nLine;       	// error line number
494 	sal_uLong nColumn;		// error column number
495 	String sMessage;   	// readable error message
496 };
497 
498 //-------------------------------------------------------------------------
499 
500 /** validating xml parser, creates a document tree with xml nodes
501  */
502 
503 
504 class SimpleXMLParser
505 {
506 private:
507 	XML_Parser aParser;
508 	XMLError aErrorInformation;
509 
510 	XMLFile *pXMLFile;
511 	XMLParentNode *pCurNode;
512 	XMLData *pCurData;
513 
514 
515     static void StartElementHandler( void *userData, const XML_Char *name, const XML_Char **atts );
516 	static void EndElementHandler( void *userData, const XML_Char *name );
517 	static void CharacterDataHandler( void *userData, const XML_Char *s, int len );
518 	static void CommentHandler( void *userData, const XML_Char *data );
519 	static void DefaultHandler( void *userData, const XML_Char *s, int len );
520 
521 
522 	void StartElement( const XML_Char *name, const XML_Char **atts );
523 	void EndElement( const XML_Char *name );
524 	void CharacterData( const XML_Char *s, int len );
525 	void Comment( const XML_Char *data );
526 	void Default( const XML_Char *s, int len );
527 
528 
529 public:
530 	/// creates a new parser
531 	SimpleXMLParser();
532 	~SimpleXMLParser();
533 
534 	/// parse a file, returns NULL on criticall errors
535 	XMLFile *Execute(
536         const String &rFullFileName,
537         const String &rFileName,	// the file name
538         XMLFile *pXMLFileIn         // the XMLFile
539 	);
540 
541 	/// parse a memory stream, returns NULL on criticall errors
542 	XMLFile *Execute(
543 		SvMemoryStream *pStream	// the stream
544 	);
545 
546 	/// returns an error struct
547 	const XMLError &GetError() { return aErrorInformation; }
548 };
549 
550 #endif
551