xref: /trunk/main/l10ntools/inc/xmlparse.hxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #ifndef BOOTSTRP_XMLPARSE_HXX
29 #define BOOTSTRP_XMLPARSE_HXX
30 
31 #include <signal.h>
32 #include <expat.h>
33 #include <rtl/ustring.hxx>
34 #include <rtl/ustrbuf.hxx>
35 #include "tools/string.hxx"
36 #include "tools/list.hxx"
37 #define ENABLE_BYTESTRING_STREAM_OPERATORS
38 #include "tools/stream.hxx"
39 #include "tools/isofallback.hxx"
40 #include "export.hxx"
41 #include "xmlutil.hxx"
42 
43 #include <fstream>
44 #include <iostream>
45 
46 class XMLParentNode;
47 class XMLElement;
48 
49 
50 using namespace ::rtl;
51 using namespace std;
52 
53 #include <hash_map> /* std::hashmap*/
54 #include <deque>    /* std::deque*/
55 #include <iterator> /* std::iterator*/
56 #include <list>     /* std::list*/
57 #include <vector>   /* std::vector*/
58 #define XML_NODE_TYPE_FILE          0x001
59 #define XML_NODE_TYPE_ELEMENT       0x002
60 #define XML_NODE_TYPE_DATA          0x003
61 #define XML_NODE_TYPE_COMMENT       0x004
62 #define XML_NODE_TYPE_DEFAULT       0x005
63 #define MAX_LANGUAGES               99
64 
65 
66 //#define TESTDRIVER        /* use xml2gsi testclass */
67 //-------------------------------------------------------------------------
68 
69 /** Holds data of Attributes
70  */
71 class XMLAttribute : public String
72 {
73 private:
74     String sValue;
75 
76 public:
77     /// creates an attribute
78     XMLAttribute(
79         const String &rName,    // attributes name
80         const String &rValue    // attributes data
81     )
82                 : String( rName ), sValue( rValue ) {}
83 
84     /// getting value of an attribue
85     const String &GetValue() { return sValue; }
86 
87     void setValue(const String &rValue){sValue=rValue;}
88 
89     /// returns true if two attributes are equal and have the same value
90     sal_Bool IsEqual(
91         const XMLAttribute &rAttribute  // the attribute which has to be equal
92     )
93     {
94         return (( rAttribute == *this ) && ( rAttribute.sValue == sValue ));
95     }
96 };
97 
98 DECLARE_LIST( XMLAttributeList, XMLAttribute * )
99 
100 //-------------------------------------------------------------------------
101 
102 /** Virtual base to handle different kinds of XML nodes
103  */
104 class XMLNode
105 {
106 protected:
107     XMLNode() {}
108 
109 public:
110     virtual sal_uInt16 GetNodeType() = 0;
111     virtual ~XMLNode() {}
112 };
113 
114 //-------------------------------------------------------------------------
115 
116 /** Virtual base to handle different kinds of child nodes
117  */
118 class XMLChildNode : public XMLNode
119 {
120 private:
121     XMLParentNode *pParent;
122 
123 protected:
124     XMLChildNode( XMLParentNode *pPar );
125     XMLChildNode():pParent( NULL ){};
126     XMLChildNode( const XMLChildNode& obj);
127     XMLChildNode& operator=(const XMLChildNode& obj);
128 public:
129     virtual sal_uInt16 GetNodeType() = 0;
130 
131     /// returns the parent of this node
132     XMLParentNode *GetParent() { return pParent; }
133     virtual ~XMLChildNode(){};
134 };
135 
136 DECLARE_LIST( XMLChildNodeList, XMLChildNode * )
137 
138 //-------------------------------------------------------------------------
139 
140 /** Virtual base to handle different kinds of parent nodes
141  */
142 class XMLData;
143 
144 class XMLParentNode : public XMLChildNode
145 {
146 private:
147     XMLChildNodeList *pChildList;
148     static int dbgcnt;
149     //int         nParentPos;
150 protected:
151     XMLParentNode( XMLParentNode *pPar )
152                 : XMLChildNode( pPar ), pChildList( NULL )
153               {
154               }
155     XMLParentNode(): pChildList(NULL){
156     }
157     /// Copyconstructor
158     XMLParentNode( const XMLParentNode& );
159 
160     XMLParentNode& operator=(const XMLParentNode& obj);
161     virtual ~XMLParentNode();
162 
163 
164 public:
165     virtual sal_uInt16 GetNodeType() = 0;
166 
167     /// returns child list of this node
168     XMLChildNodeList *GetChildList() { return pChildList; }
169 
170     /// adds a new child
171     void AddChild(
172         XMLChildNode *pChild    /// the new child
173     );
174 
175     void AddChild(
176         XMLChildNode *pChild , int pos  /// the new child
177     );
178 
179     virtual int GetPosition( ByteString id );
180     int RemoveChild( XMLElement *pRefElement );
181     void RemoveAndDeleteAllChilds();
182 
183     /// returns a child element which matches the given one
184     XMLElement *GetChildElement(
185         XMLElement *pRefElement // the reference elelement
186     );
187 };
188 
189 //-------------------------------------------------------------------------
190 
191 DECLARE_LIST( XMLStringList, XMLElement* )
192 
193 /// Mapping numeric Language code <-> XML Element
194 typedef std::hash_map< ByteString ,XMLElement* , hashByteString,equalByteString > LangHashMap;
195 
196 /// Mapping XML Element string identifier <-> Language Map
197 typedef std::hash_map<ByteString , LangHashMap* ,
198                       hashByteString,equalByteString>                   XMLHashMap;
199 
200 /// Mapping iso alpha string code <-> iso numeric code
201 typedef std::hash_map<ByteString, int, hashByteString,equalByteString>  HashMap;
202 
203 /// Mapping XML tag names <-> have localizable strings
204 typedef std::hash_map<ByteString , sal_Bool ,
205                       hashByteString,equalByteString>                   TagMap;
206 
207 /** Holds information of a XML file, is root node of tree
208  */
209 
210 
211 class XMLFile : public XMLParentNode
212 {
213 public:
214     XMLFile() ;
215     XMLFile(
216                 const String &rFileName // the file name, empty if created from memory stream
217     );
218     XMLFile( const XMLFile& obj ) ;
219     ~XMLFile();
220 
221     ByteString* GetGroupID(std::deque<ByteString> &groupid);
222     void        Print( XMLNode *pCur = NULL, sal_uInt16 nLevel = 0 );
223     virtual void SearchL10NElements( XMLParentNode *pCur, int pos = 0 );
224     void        Extract( XMLFile *pCur = NULL );
225     void        View();
226 //  void static Signal_handler(int signo);//void*,oslSignalInfo * pInfo);
227     void        showType(XMLParentNode* node);
228 
229     XMLHashMap* GetStrings(){return XMLStrings;}
230     sal_Bool        Write( ByteString &rFilename );
231     sal_Bool        Write( ofstream &rStream , XMLNode *pCur = NULL );
232 
233     bool        CheckExportStatus( XMLParentNode *pCur = NULL );// , int pos = 0 );
234 
235     XMLFile&    operator=(const XMLFile& obj);
236 
237     virtual sal_uInt16  GetNodeType();
238 
239     /// returns file name
240     const String &GetName() { return sFileName; }
241     void          SetName( const String &rFilename ) { sFileName = rFilename; }
242     void          SetFullName( const String &rFullFilename ) { sFullName = rFullFilename; }
243     const std::vector<ByteString> getOrder(){ return order; }
244 
245 protected:
246     // writes a string as UTF8 with dos line ends to a given stream
247     void        WriteString( ofstream &rStream, const String &sString );
248 
249     // quotes the given text for writing to a file
250     void        QuotHTML( String &rString );
251 
252     void        InsertL10NElement( XMLElement* pElement);
253 
254     // DATA
255     String      sFileName;
256     String      sFullName;
257 
258     const ByteString ID,OLDREF,XML_LANG;
259 
260     TagMap      nodes_localize;
261     XMLHashMap* XMLStrings;
262 
263     std::vector <ByteString> order;
264 };
265 
266 /// An Utility class for XML
267 /// See RFC 3066 / #i8252# for ISO codes
268 class XMLUtil{
269 
270 public:
271     /// Quot the XML characters and replace \n \t
272     static void         QuotHTML( String &rString );
273 
274     /// UnQuot the XML characters and restore \n \t
275     static void         UnQuotHTML  ( String &rString );
276 
277     /// Return the numeric iso language code
278     //sal_uInt16                GetLangByIsoLang( const ByteString &rIsoLang );
279 
280     /// Return the alpha strings representation
281     ByteString          GetIsoLangByIndex( sal_uInt16 nIndex );
282 
283     static XMLUtil&     Instance();
284     ~XMLUtil();
285 
286     void         dump();
287 
288 private:
289     /// Mapping iso alpha string code <-> iso numeric code
290     HashMap      lMap;
291 
292     /// Mapping iso numeric code      <-> iso alpha string code
293     ByteString   isoArray[MAX_LANGUAGES];
294 
295     static void UnQuotData( String &rString );
296     static void UnQuotTags( String &rString );
297 
298     XMLUtil();
299     XMLUtil(const XMLUtil&);
300 
301 };
302 
303 
304 
305 //-------------------------------------------------------------------------
306 
307 /** Hold information of an element node
308  */
309 class XMLElement : public XMLParentNode
310 {
311 private:
312     String sElementName;
313     XMLAttributeList *pAttributes;
314     ByteString   project,
315                  filename,
316                  id,
317                  sOldRef,
318                  resourceType,
319                  languageId;
320     int          nPos;
321 
322 protected:
323     void Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement);
324 public:
325     /// create a element node
326     XMLElement(){}
327     XMLElement(
328         const String &rName,    // the element name
329         XMLParentNode *Parent   // parent node of this element
330     ):          XMLParentNode( Parent ),
331                 sElementName( rName ),
332                 pAttributes( NULL ),
333                 project(""),
334                 filename(""),
335                 id(""),
336                 sOldRef(""),
337                 resourceType(""),
338                 languageId(""),
339                 nPos(0)
340                 {
341                 }
342     ~XMLElement();
343     XMLElement(const XMLElement&);
344 
345     XMLElement& operator=(const XMLElement& obj);
346     /// returns node type XML_NODE_ELEMENT
347     virtual sal_uInt16 GetNodeType();
348 
349     /// returns element name
350     const String &GetName() { return sElementName; }
351 
352     /// returns list of attributes of this element
353     XMLAttributeList *GetAttributeList() { return pAttributes; }
354 
355     /// adds a new attribute to this element, typically used by parser
356     void AddAttribute( const String &rAttribute, const String &rValue );
357 
358     void ChangeLanguageTag( const String &rValue );
359     // Return a ASCII String representation of this object
360     OString ToOString();
361 
362     // Return a Unicode String representation of this object
363     OUString ToOUString();
364 
365     bool    Equals(OUString refStr);
366 
367     /// returns a attribute
368     XMLAttribute *GetAttribute(
369         const String &rName // the attribute name
370     );
371     void SetProject         ( ByteString prj        ){ project = prj;        }
372     void SetFileName        ( ByteString fn         ){ filename = fn;        }
373     void SetId              ( ByteString theId      ){ id = theId;           }
374     void SetResourceType    ( ByteString rt         ){ resourceType = rt;    }
375     void SetLanguageId      ( ByteString lid        ){ languageId = lid;     }
376     void SetPos             ( int nPos_in           ){ nPos = nPos_in;       }
377     void SetOldRef          ( ByteString sOldRef_in ){ sOldRef = sOldRef_in; }
378 
379     virtual int        GetPos()         { return nPos;         }
380     ByteString GetProject()     { return project;      }
381     ByteString GetFileName()    { return filename;     }
382     ByteString GetId()          { return id;           }
383     ByteString GetOldref()      { return sOldRef;      }
384     ByteString GetResourceType(){ return resourceType; }
385     ByteString GetLanguageId()  { return languageId;   }
386 
387 
388 };
389 //-------------------------------------------------------------------------
390 
391 
392 /** Holds character data
393  */
394 class XMLData : public XMLChildNode
395 {
396 private:
397     String sData;
398     bool   isNewCreated;
399 
400 public:
401     /// create a data node
402     XMLData(
403         const String &rData,    // the initial data
404         XMLParentNode *Parent   // the parent node of this data, typically a element node
405     )
406                 : XMLChildNode( Parent ), sData( rData ) , isNewCreated ( false ){}
407     XMLData(
408         const String &rData,    // the initial data
409         XMLParentNode *Parent,  // the parent node of this data, typically a element node
410         bool newCreated
411     )
412                 : XMLChildNode( Parent ), sData( rData ) , isNewCreated ( newCreated ){}
413 
414     XMLData(const XMLData& obj);
415 
416     XMLData& operator=(const XMLData& obj);
417     virtual sal_uInt16 GetNodeType();
418 
419     /// returns the data
420     const String &GetData() { return sData; }
421 
422     bool isNew() { return isNewCreated; }
423     /// adds new character data to the existing one
424     void AddData(
425         const String &rData // the new data
426     );
427 
428 
429 
430 };
431 
432 //-------------------------------------------------------------------------
433 
434 /** Holds comments
435  */
436 class XMLComment : public XMLChildNode
437 {
438 private:
439     String sComment;
440 
441 public:
442     /// create a comment node
443     XMLComment(
444         const String &rComment, // the comment
445         XMLParentNode *Parent   // the parent node of this comemnt, typically a element node
446     )
447                 : XMLChildNode( Parent ), sComment( rComment ) {}
448 
449     virtual sal_uInt16 GetNodeType();
450 
451     XMLComment( const XMLComment& obj );
452 
453     XMLComment& operator=(const XMLComment& obj);
454 
455     /// returns the comment
456     const String &GetComment()  { return sComment; }
457 };
458 
459 //-------------------------------------------------------------------------
460 
461 /** Holds additional file content like those for which no handler exists
462  */
463 class XMLDefault : public XMLChildNode
464 {
465 private:
466     String sDefault;
467 
468 public:
469     /// create a comment node
470     XMLDefault(
471         const String &rDefault, // the comment
472         XMLParentNode *Parent   // the parent node of this comemnt, typically a element node
473     )
474                 : XMLChildNode( Parent ), sDefault( rDefault ) {}
475 
476     XMLDefault(const XMLDefault& obj);
477 
478     XMLDefault& operator=(const XMLDefault& obj);
479 
480     /// returns node type XML_NODE_TYPE_COMMENT
481     virtual sal_uInt16 GetNodeType();
482 
483     /// returns the comment
484     const String &GetDefault()  { return sDefault; }
485 };
486 
487 //-------------------------------------------------------------------------
488 
489 /** struct for error information, used by class SimpleXMLParser
490  */
491 struct XMLError {
492     XML_Error eCode;    // the error code
493     sal_uLong nLine;        // error line number
494     sal_uLong nColumn;      // error column number
495     String sMessage;    // readable error message
496 };
497 
498 //-------------------------------------------------------------------------
499 
500 /** validating xml parser, creates a document tree with xml nodes
501  */
502 
503 
504 class SimpleXMLParser
505 {
506 private:
507     XML_Parser aParser;
508     XMLError aErrorInformation;
509 
510     XMLFile *pXMLFile;
511     XMLParentNode *pCurNode;
512     XMLData *pCurData;
513 
514 
515     static void StartElementHandler( void *userData, const XML_Char *name, const XML_Char **atts );
516     static void EndElementHandler( void *userData, const XML_Char *name );
517     static void CharacterDataHandler( void *userData, const XML_Char *s, int len );
518     static void CommentHandler( void *userData, const XML_Char *data );
519     static void DefaultHandler( void *userData, const XML_Char *s, int len );
520 
521 
522     void StartElement( const XML_Char *name, const XML_Char **atts );
523     void EndElement( const XML_Char *name );
524     void CharacterData( const XML_Char *s, int len );
525     void Comment( const XML_Char *data );
526     void Default( const XML_Char *s, int len );
527 
528 
529 public:
530     /// creates a new parser
531     SimpleXMLParser();
532     ~SimpleXMLParser();
533 
534     /// parse a file, returns NULL on criticall errors
535     XMLFile *Execute(
536         const String &rFullFileName,
537         const String &rFileName,    // the file name
538         XMLFile *pXMLFileIn         // the XMLFile
539     );
540 
541     /// parse a memory stream, returns NULL on criticall errors
542     XMLFile *Execute(
543         SvMemoryStream *pStream // the stream
544     );
545 
546     /// returns an error struct
547     const XMLError &GetError() { return aErrorInformation; }
548 };
549 
550 #endif
551