1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #ifndef BOOTSTRP_XMLPARSE_HXX 29 #define BOOTSTRP_XMLPARSE_HXX 30 31 #include <signal.h> 32 #include <expat.h> 33 #include <rtl/ustring.hxx> 34 #include <rtl/ustrbuf.hxx> 35 #include "tools/string.hxx" 36 #include "tools/list.hxx" 37 #define ENABLE_BYTESTRING_STREAM_OPERATORS 38 #include "tools/stream.hxx" 39 #include "tools/isofallback.hxx" 40 #include "export.hxx" 41 #include "xmlutil.hxx" 42 43 #include <fstream> 44 #include <iostream> 45 46 class XMLParentNode; 47 class XMLElement; 48 49 50 using namespace ::rtl; 51 using namespace std; 52 53 #include <hash_map> /* std::hashmap*/ 54 #include <deque> /* std::deque*/ 55 #include <iterator> /* std::iterator*/ 56 #include <list> /* std::list*/ 57 #include <vector> /* std::vector*/ 58 #define XML_NODE_TYPE_FILE 0x001 59 #define XML_NODE_TYPE_ELEMENT 0x002 60 #define XML_NODE_TYPE_DATA 0x003 61 #define XML_NODE_TYPE_COMMENT 0x004 62 #define XML_NODE_TYPE_DEFAULT 0x005 63 #define MAX_LANGUAGES 99 64 65 66 //#define TESTDRIVER /* use xml2gsi testclass */ 67 //------------------------------------------------------------------------- 68 69 /** Holds data of Attributes 70 */ 71 class XMLAttribute : public String 72 { 73 private: 74 String sValue; 75 76 public: 77 /// creates an attribute 78 XMLAttribute( 79 const String &rName, // attributes name 80 const String &rValue // attributes data 81 ) 82 : String( rName ), sValue( rValue ) {} 83 84 /// getting value of an attribue 85 const String &GetValue() { return sValue; } 86 87 void setValue(const String &rValue){sValue=rValue;} 88 89 /// returns true if two attributes are equal and have the same value 90 sal_Bool IsEqual( 91 const XMLAttribute &rAttribute // the attribute which has to be equal 92 ) 93 { 94 return (( rAttribute == *this ) && ( rAttribute.sValue == sValue )); 95 } 96 }; 97 98 DECLARE_LIST( XMLAttributeList, XMLAttribute * ) 99 100 //------------------------------------------------------------------------- 101 102 /** Virtual base to handle different kinds of XML nodes 103 */ 104 class XMLNode 105 { 106 protected: 107 XMLNode() {} 108 109 public: 110 virtual sal_uInt16 GetNodeType() = 0; 111 virtual ~XMLNode() {} 112 }; 113 114 //------------------------------------------------------------------------- 115 116 /** Virtual base to handle different kinds of child nodes 117 */ 118 class XMLChildNode : public XMLNode 119 { 120 private: 121 XMLParentNode *pParent; 122 123 protected: 124 XMLChildNode( XMLParentNode *pPar ); 125 XMLChildNode():pParent( NULL ){}; 126 XMLChildNode( const XMLChildNode& obj); 127 XMLChildNode& operator=(const XMLChildNode& obj); 128 public: 129 virtual sal_uInt16 GetNodeType() = 0; 130 131 /// returns the parent of this node 132 XMLParentNode *GetParent() { return pParent; } 133 virtual ~XMLChildNode(){}; 134 }; 135 136 DECLARE_LIST( XMLChildNodeList, XMLChildNode * ) 137 138 //------------------------------------------------------------------------- 139 140 /** Virtual base to handle different kinds of parent nodes 141 */ 142 class XMLData; 143 144 class XMLParentNode : public XMLChildNode 145 { 146 private: 147 XMLChildNodeList *pChildList; 148 static int dbgcnt; 149 //int nParentPos; 150 protected: 151 XMLParentNode( XMLParentNode *pPar ) 152 : XMLChildNode( pPar ), pChildList( NULL ) 153 { 154 } 155 XMLParentNode(): pChildList(NULL){ 156 } 157 /// Copyconstructor 158 XMLParentNode( const XMLParentNode& ); 159 160 XMLParentNode& operator=(const XMLParentNode& obj); 161 virtual ~XMLParentNode(); 162 163 164 public: 165 virtual sal_uInt16 GetNodeType() = 0; 166 167 /// returns child list of this node 168 XMLChildNodeList *GetChildList() { return pChildList; } 169 170 /// adds a new child 171 void AddChild( 172 XMLChildNode *pChild /// the new child 173 ); 174 175 void AddChild( 176 XMLChildNode *pChild , int pos /// the new child 177 ); 178 179 virtual int GetPosition( ByteString id ); 180 int RemoveChild( XMLElement *pRefElement ); 181 void RemoveAndDeleteAllChilds(); 182 183 /// returns a child element which matches the given one 184 XMLElement *GetChildElement( 185 XMLElement *pRefElement // the reference elelement 186 ); 187 }; 188 189 //------------------------------------------------------------------------- 190 191 DECLARE_LIST( XMLStringList, XMLElement* ) 192 193 /// Mapping numeric Language code <-> XML Element 194 typedef std::hash_map< ByteString ,XMLElement* , hashByteString,equalByteString > LangHashMap; 195 196 /// Mapping XML Element string identifier <-> Language Map 197 typedef std::hash_map<ByteString , LangHashMap* , 198 hashByteString,equalByteString> XMLHashMap; 199 200 /// Mapping iso alpha string code <-> iso numeric code 201 typedef std::hash_map<ByteString, int, hashByteString,equalByteString> HashMap; 202 203 /// Mapping XML tag names <-> have localizable strings 204 typedef std::hash_map<ByteString , sal_Bool , 205 hashByteString,equalByteString> TagMap; 206 207 /** Holds information of a XML file, is root node of tree 208 */ 209 210 211 class XMLFile : public XMLParentNode 212 { 213 public: 214 XMLFile() ; 215 XMLFile( 216 const String &rFileName // the file name, empty if created from memory stream 217 ); 218 XMLFile( const XMLFile& obj ) ; 219 ~XMLFile(); 220 221 ByteString* GetGroupID(std::deque<ByteString> &groupid); 222 void Print( XMLNode *pCur = NULL, sal_uInt16 nLevel = 0 ); 223 virtual void SearchL10NElements( XMLParentNode *pCur, int pos = 0 ); 224 void Extract( XMLFile *pCur = NULL ); 225 void View(); 226 // void static Signal_handler(int signo);//void*,oslSignalInfo * pInfo); 227 void showType(XMLParentNode* node); 228 229 XMLHashMap* GetStrings(){return XMLStrings;} 230 sal_Bool Write( ByteString &rFilename ); 231 sal_Bool Write( ofstream &rStream , XMLNode *pCur = NULL ); 232 233 bool CheckExportStatus( XMLParentNode *pCur = NULL );// , int pos = 0 ); 234 235 XMLFile& operator=(const XMLFile& obj); 236 237 virtual sal_uInt16 GetNodeType(); 238 239 /// returns file name 240 const String &GetName() { return sFileName; } 241 void SetName( const String &rFilename ) { sFileName = rFilename; } 242 void SetFullName( const String &rFullFilename ) { sFullName = rFullFilename; } 243 const std::vector<ByteString> getOrder(){ return order; } 244 245 protected: 246 // writes a string as UTF8 with dos line ends to a given stream 247 void WriteString( ofstream &rStream, const String &sString ); 248 249 // quotes the given text for writing to a file 250 void QuotHTML( String &rString ); 251 252 void InsertL10NElement( XMLElement* pElement); 253 254 // DATA 255 String sFileName; 256 String sFullName; 257 258 const ByteString ID,OLDREF,XML_LANG; 259 260 TagMap nodes_localize; 261 XMLHashMap* XMLStrings; 262 263 std::vector <ByteString> order; 264 }; 265 266 /// An Utility class for XML 267 /// See RFC 3066 / #i8252# for ISO codes 268 class XMLUtil{ 269 270 public: 271 /// Quot the XML characters and replace \n \t 272 static void QuotHTML( String &rString ); 273 274 /// UnQuot the XML characters and restore \n \t 275 static void UnQuotHTML ( String &rString ); 276 277 /// Return the numeric iso language code 278 //sal_uInt16 GetLangByIsoLang( const ByteString &rIsoLang ); 279 280 /// Return the alpha strings representation 281 ByteString GetIsoLangByIndex( sal_uInt16 nIndex ); 282 283 static XMLUtil& Instance(); 284 ~XMLUtil(); 285 286 void dump(); 287 288 private: 289 /// Mapping iso alpha string code <-> iso numeric code 290 HashMap lMap; 291 292 /// Mapping iso numeric code <-> iso alpha string code 293 ByteString isoArray[MAX_LANGUAGES]; 294 295 static void UnQuotData( String &rString ); 296 static void UnQuotTags( String &rString ); 297 298 XMLUtil(); 299 XMLUtil(const XMLUtil&); 300 301 }; 302 303 304 305 //------------------------------------------------------------------------- 306 307 /** Hold information of an element node 308 */ 309 class XMLElement : public XMLParentNode 310 { 311 private: 312 String sElementName; 313 XMLAttributeList *pAttributes; 314 ByteString project, 315 filename, 316 id, 317 sOldRef, 318 resourceType, 319 languageId; 320 int nPos; 321 322 protected: 323 void Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement); 324 public: 325 /// create a element node 326 XMLElement(){} 327 XMLElement( 328 const String &rName, // the element name 329 XMLParentNode *Parent // parent node of this element 330 ): XMLParentNode( Parent ), 331 sElementName( rName ), 332 pAttributes( NULL ), 333 project(""), 334 filename(""), 335 id(""), 336 sOldRef(""), 337 resourceType(""), 338 languageId(""), 339 nPos(0) 340 { 341 } 342 ~XMLElement(); 343 XMLElement(const XMLElement&); 344 345 XMLElement& operator=(const XMLElement& obj); 346 /// returns node type XML_NODE_ELEMENT 347 virtual sal_uInt16 GetNodeType(); 348 349 /// returns element name 350 const String &GetName() { return sElementName; } 351 352 /// returns list of attributes of this element 353 XMLAttributeList *GetAttributeList() { return pAttributes; } 354 355 /// adds a new attribute to this element, typically used by parser 356 void AddAttribute( const String &rAttribute, const String &rValue ); 357 358 void ChangeLanguageTag( const String &rValue ); 359 // Return a ASCII String representation of this object 360 OString ToOString(); 361 362 // Return a Unicode String representation of this object 363 OUString ToOUString(); 364 365 bool Equals(OUString refStr); 366 367 /// returns a attribute 368 XMLAttribute *GetAttribute( 369 const String &rName // the attribute name 370 ); 371 void SetProject ( ByteString prj ){ project = prj; } 372 void SetFileName ( ByteString fn ){ filename = fn; } 373 void SetId ( ByteString theId ){ id = theId; } 374 void SetResourceType ( ByteString rt ){ resourceType = rt; } 375 void SetLanguageId ( ByteString lid ){ languageId = lid; } 376 void SetPos ( int nPos_in ){ nPos = nPos_in; } 377 void SetOldRef ( ByteString sOldRef_in ){ sOldRef = sOldRef_in; } 378 379 virtual int GetPos() { return nPos; } 380 ByteString GetProject() { return project; } 381 ByteString GetFileName() { return filename; } 382 ByteString GetId() { return id; } 383 ByteString GetOldref() { return sOldRef; } 384 ByteString GetResourceType(){ return resourceType; } 385 ByteString GetLanguageId() { return languageId; } 386 387 388 }; 389 //------------------------------------------------------------------------- 390 391 392 /** Holds character data 393 */ 394 class XMLData : public XMLChildNode 395 { 396 private: 397 String sData; 398 bool isNewCreated; 399 400 public: 401 /// create a data node 402 XMLData( 403 const String &rData, // the initial data 404 XMLParentNode *Parent // the parent node of this data, typically a element node 405 ) 406 : XMLChildNode( Parent ), sData( rData ) , isNewCreated ( false ){} 407 XMLData( 408 const String &rData, // the initial data 409 XMLParentNode *Parent, // the parent node of this data, typically a element node 410 bool newCreated 411 ) 412 : XMLChildNode( Parent ), sData( rData ) , isNewCreated ( newCreated ){} 413 414 XMLData(const XMLData& obj); 415 416 XMLData& operator=(const XMLData& obj); 417 virtual sal_uInt16 GetNodeType(); 418 419 /// returns the data 420 const String &GetData() { return sData; } 421 422 bool isNew() { return isNewCreated; } 423 /// adds new character data to the existing one 424 void AddData( 425 const String &rData // the new data 426 ); 427 428 429 430 }; 431 432 //------------------------------------------------------------------------- 433 434 /** Holds comments 435 */ 436 class XMLComment : public XMLChildNode 437 { 438 private: 439 String sComment; 440 441 public: 442 /// create a comment node 443 XMLComment( 444 const String &rComment, // the comment 445 XMLParentNode *Parent // the parent node of this comemnt, typically a element node 446 ) 447 : XMLChildNode( Parent ), sComment( rComment ) {} 448 449 virtual sal_uInt16 GetNodeType(); 450 451 XMLComment( const XMLComment& obj ); 452 453 XMLComment& operator=(const XMLComment& obj); 454 455 /// returns the comment 456 const String &GetComment() { return sComment; } 457 }; 458 459 //------------------------------------------------------------------------- 460 461 /** Holds additional file content like those for which no handler exists 462 */ 463 class XMLDefault : public XMLChildNode 464 { 465 private: 466 String sDefault; 467 468 public: 469 /// create a comment node 470 XMLDefault( 471 const String &rDefault, // the comment 472 XMLParentNode *Parent // the parent node of this comemnt, typically a element node 473 ) 474 : XMLChildNode( Parent ), sDefault( rDefault ) {} 475 476 XMLDefault(const XMLDefault& obj); 477 478 XMLDefault& operator=(const XMLDefault& obj); 479 480 /// returns node type XML_NODE_TYPE_COMMENT 481 virtual sal_uInt16 GetNodeType(); 482 483 /// returns the comment 484 const String &GetDefault() { return sDefault; } 485 }; 486 487 //------------------------------------------------------------------------- 488 489 /** struct for error information, used by class SimpleXMLParser 490 */ 491 struct XMLError { 492 XML_Error eCode; // the error code 493 sal_uLong nLine; // error line number 494 sal_uLong nColumn; // error column number 495 String sMessage; // readable error message 496 }; 497 498 //------------------------------------------------------------------------- 499 500 /** validating xml parser, creates a document tree with xml nodes 501 */ 502 503 504 class SimpleXMLParser 505 { 506 private: 507 XML_Parser aParser; 508 XMLError aErrorInformation; 509 510 XMLFile *pXMLFile; 511 XMLParentNode *pCurNode; 512 XMLData *pCurData; 513 514 515 static void StartElementHandler( void *userData, const XML_Char *name, const XML_Char **atts ); 516 static void EndElementHandler( void *userData, const XML_Char *name ); 517 static void CharacterDataHandler( void *userData, const XML_Char *s, int len ); 518 static void CommentHandler( void *userData, const XML_Char *data ); 519 static void DefaultHandler( void *userData, const XML_Char *s, int len ); 520 521 522 void StartElement( const XML_Char *name, const XML_Char **atts ); 523 void EndElement( const XML_Char *name ); 524 void CharacterData( const XML_Char *s, int len ); 525 void Comment( const XML_Char *data ); 526 void Default( const XML_Char *s, int len ); 527 528 529 public: 530 /// creates a new parser 531 SimpleXMLParser(); 532 ~SimpleXMLParser(); 533 534 /// parse a file, returns NULL on criticall errors 535 XMLFile *Execute( 536 const String &rFullFileName, 537 const String &rFileName, // the file name 538 XMLFile *pXMLFileIn // the XMLFile 539 ); 540 541 /// parse a memory stream, returns NULL on criticall errors 542 XMLFile *Execute( 543 SvMemoryStream *pStream // the stream 544 ); 545 546 /// returns an error struct 547 const XMLError &GetError() { return aErrorInformation; } 548 }; 549 550 #endif 551