1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_shell.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #ifndef XML_PARSER_HXX_INCLUDED 32*cdf0e10cSrcweir #include "internal/xml_parser.hxx" 33*cdf0e10cSrcweir #endif 34*cdf0e10cSrcweir #include "internal/i_xml_parser_event_handler.hxx" 35*cdf0e10cSrcweir 36*cdf0e10cSrcweir #include <assert.h> 37*cdf0e10cSrcweir 38*cdf0e10cSrcweir namespace /* private */ 39*cdf0e10cSrcweir { 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir //###################################################### 42*cdf0e10cSrcweir /* Extracts the local part of tag without 43*cdf0e10cSrcweir namespace decoration e.g. meta:creator -> creator */ 44*cdf0e10cSrcweir const XML_Char COLON = (XML_Char)':'; 45*cdf0e10cSrcweir 46*cdf0e10cSrcweir const XML_Char* get_local_name(const XML_Char* rawname) 47*cdf0e10cSrcweir { 48*cdf0e10cSrcweir const XML_Char* p = rawname; 49*cdf0e10cSrcweir 50*cdf0e10cSrcweir // go to the end 51*cdf0e10cSrcweir while (*p) p++; 52*cdf0e10cSrcweir 53*cdf0e10cSrcweir // go back until the first ':' 54*cdf0e10cSrcweir while (*p != COLON && p > rawname) 55*cdf0e10cSrcweir p--; 56*cdf0e10cSrcweir 57*cdf0e10cSrcweir // if we are on a colon one step forward 58*cdf0e10cSrcweir if (*p == COLON) 59*cdf0e10cSrcweir p++; 60*cdf0e10cSrcweir 61*cdf0e10cSrcweir return p; 62*cdf0e10cSrcweir } 63*cdf0e10cSrcweir 64*cdf0e10cSrcweir //################################################ 65*cdf0e10cSrcweir inline xml_parser* get_parser_instance(void* data) 66*cdf0e10cSrcweir { 67*cdf0e10cSrcweir return reinterpret_cast<xml_parser*>(XML_GetUserData( 68*cdf0e10cSrcweir reinterpret_cast<XML_Parser>(data))); 69*cdf0e10cSrcweir } 70*cdf0e10cSrcweir 71*cdf0e10cSrcweir //################################################ 72*cdf0e10cSrcweir bool has_only_whitespaces(const XML_Char* s, int len) 73*cdf0e10cSrcweir { 74*cdf0e10cSrcweir const XML_Char* p = s; 75*cdf0e10cSrcweir for (int i = 0; i < len; i++) 76*cdf0e10cSrcweir if (*p++ != ' ') return false; 77*cdf0e10cSrcweir return true; 78*cdf0e10cSrcweir } 79*cdf0e10cSrcweir } 80*cdf0e10cSrcweir 81*cdf0e10cSrcweir //################################################### 82*cdf0e10cSrcweir xml_parser::xml_parser(const XML_Char* EncodingName) : 83*cdf0e10cSrcweir document_handler_(0), 84*cdf0e10cSrcweir xml_parser_(XML_ParserCreate(EncodingName)) 85*cdf0e10cSrcweir { 86*cdf0e10cSrcweir init(); 87*cdf0e10cSrcweir } 88*cdf0e10cSrcweir 89*cdf0e10cSrcweir //################################################### 90*cdf0e10cSrcweir xml_parser::~xml_parser() 91*cdf0e10cSrcweir { 92*cdf0e10cSrcweir XML_ParserFree(xml_parser_); 93*cdf0e10cSrcweir } 94*cdf0e10cSrcweir 95*cdf0e10cSrcweir //################################################### 96*cdf0e10cSrcweir /* Callback functions will be called by the parser on 97*cdf0e10cSrcweir different events */ 98*cdf0e10cSrcweir 99*cdf0e10cSrcweir //################################################### 100*cdf0e10cSrcweir extern "C" 101*cdf0e10cSrcweir { 102*cdf0e10cSrcweir 103*cdf0e10cSrcweir static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts) 104*cdf0e10cSrcweir { 105*cdf0e10cSrcweir assert(UserData != NULL); 106*cdf0e10cSrcweir 107*cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData); 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 110*cdf0e10cSrcweir if (pDocHdl) 111*cdf0e10cSrcweir { 112*cdf0e10cSrcweir xml_tag_attribute_container_t attributes; 113*cdf0e10cSrcweir 114*cdf0e10cSrcweir int i = 0; 115*cdf0e10cSrcweir 116*cdf0e10cSrcweir while(atts[i]) 117*cdf0e10cSrcweir { 118*cdf0e10cSrcweir attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]); 119*cdf0e10cSrcweir i += 2; // skip to next pair 120*cdf0e10cSrcweir } 121*cdf0e10cSrcweir 122*cdf0e10cSrcweir pDocHdl->start_element( 123*cdf0e10cSrcweir reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes); 124*cdf0e10cSrcweir } 125*cdf0e10cSrcweir } 126*cdf0e10cSrcweir 127*cdf0e10cSrcweir //################################################### 128*cdf0e10cSrcweir static void xml_end_element_handler(void* UserData, const XML_Char* name) 129*cdf0e10cSrcweir { 130*cdf0e10cSrcweir assert(UserData); 131*cdf0e10cSrcweir 132*cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData); 133*cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 134*cdf0e10cSrcweir if (pDocHdl) 135*cdf0e10cSrcweir pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name))); 136*cdf0e10cSrcweir } 137*cdf0e10cSrcweir 138*cdf0e10cSrcweir //################################################### 139*cdf0e10cSrcweir static void xml_character_data_handler(void* UserData, const XML_Char* s, int len) 140*cdf0e10cSrcweir { 141*cdf0e10cSrcweir assert(UserData); 142*cdf0e10cSrcweir 143*cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData); 144*cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 145*cdf0e10cSrcweir if (pDocHdl) 146*cdf0e10cSrcweir { 147*cdf0e10cSrcweir if (has_only_whitespaces(s,len)) 148*cdf0e10cSrcweir pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len)); 149*cdf0e10cSrcweir else 150*cdf0e10cSrcweir pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len)); 151*cdf0e10cSrcweir } 152*cdf0e10cSrcweir } 153*cdf0e10cSrcweir 154*cdf0e10cSrcweir //################################################### 155*cdf0e10cSrcweir static void xml_comment_handler(void* UserData, const XML_Char* Data) 156*cdf0e10cSrcweir { 157*cdf0e10cSrcweir assert(UserData); 158*cdf0e10cSrcweir 159*cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData); 160*cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 161*cdf0e10cSrcweir if (pDocHdl) 162*cdf0e10cSrcweir pDocHdl->comment(reinterpret_cast<const char_t*>(Data)); 163*cdf0e10cSrcweir } 164*cdf0e10cSrcweir 165*cdf0e10cSrcweir } // extern "C" 166*cdf0e10cSrcweir 167*cdf0e10cSrcweir //################################################### 168*cdf0e10cSrcweir void xml_parser::init() 169*cdf0e10cSrcweir { 170*cdf0e10cSrcweir XML_SetUserData(xml_parser_, this); 171*cdf0e10cSrcweir 172*cdf0e10cSrcweir // we use the parser as handler argument, 173*cdf0e10cSrcweir // so we could use it if necessary, the 174*cdf0e10cSrcweir // UserData are usable anyway using 175*cdf0e10cSrcweir // XML_GetUserData(...) 176*cdf0e10cSrcweir XML_UseParserAsHandlerArg(xml_parser_); 177*cdf0e10cSrcweir 178*cdf0e10cSrcweir XML_SetElementHandler( 179*cdf0e10cSrcweir xml_parser_, 180*cdf0e10cSrcweir xml_start_element_handler, 181*cdf0e10cSrcweir xml_end_element_handler); 182*cdf0e10cSrcweir 183*cdf0e10cSrcweir XML_SetCharacterDataHandler( 184*cdf0e10cSrcweir xml_parser_, 185*cdf0e10cSrcweir xml_character_data_handler); 186*cdf0e10cSrcweir 187*cdf0e10cSrcweir XML_SetCommentHandler( 188*cdf0e10cSrcweir xml_parser_, 189*cdf0e10cSrcweir xml_comment_handler); 190*cdf0e10cSrcweir } 191*cdf0e10cSrcweir 192*cdf0e10cSrcweir //################################################### 193*cdf0e10cSrcweir void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal) 194*cdf0e10cSrcweir { 195*cdf0e10cSrcweir if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal)) 196*cdf0e10cSrcweir throw xml_parser_exception( 197*cdf0e10cSrcweir (char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)), 198*cdf0e10cSrcweir (int)XML_GetErrorCode(xml_parser_), 199*cdf0e10cSrcweir XML_GetCurrentLineNumber(xml_parser_), 200*cdf0e10cSrcweir XML_GetCurrentColumnNumber(xml_parser_), 201*cdf0e10cSrcweir XML_GetCurrentByteIndex(xml_parser_)); 202*cdf0e10cSrcweir } 203*cdf0e10cSrcweir 204*cdf0e10cSrcweir //################################################### 205*cdf0e10cSrcweir void xml_parser::set_document_handler( 206*cdf0e10cSrcweir i_xml_parser_event_handler* event_handler) 207*cdf0e10cSrcweir { 208*cdf0e10cSrcweir document_handler_ = event_handler; 209*cdf0e10cSrcweir } 210*cdf0e10cSrcweir 211*cdf0e10cSrcweir //################################################### 212*cdf0e10cSrcweir i_xml_parser_event_handler* xml_parser::get_document_handler() const 213*cdf0e10cSrcweir { 214*cdf0e10cSrcweir return document_handler_; 215*cdf0e10cSrcweir } 216