1*f8e2c85aSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*f8e2c85aSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*f8e2c85aSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*f8e2c85aSAndrew Rist * distributed with this work for additional information 6*f8e2c85aSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*f8e2c85aSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*f8e2c85aSAndrew Rist * "License"); you may not use this file except in compliance 9*f8e2c85aSAndrew Rist * with the License. You may obtain a copy of the License at 10cdf0e10cSrcweir * 11*f8e2c85aSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12cdf0e10cSrcweir * 13*f8e2c85aSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*f8e2c85aSAndrew Rist * software distributed under the License is distributed on an 15*f8e2c85aSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*f8e2c85aSAndrew Rist * KIND, either express or implied. See the License for the 17*f8e2c85aSAndrew Rist * specific language governing permissions and limitations 18*f8e2c85aSAndrew Rist * under the License. 19cdf0e10cSrcweir * 20*f8e2c85aSAndrew Rist *************************************************************/ 21*f8e2c85aSAndrew Rist 22*f8e2c85aSAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_shell.hxx" 26cdf0e10cSrcweir 27cdf0e10cSrcweir #ifndef XML_PARSER_HXX_INCLUDED 28cdf0e10cSrcweir #include "internal/xml_parser.hxx" 29cdf0e10cSrcweir #endif 30cdf0e10cSrcweir #include "internal/i_xml_parser_event_handler.hxx" 31cdf0e10cSrcweir 32cdf0e10cSrcweir #include <assert.h> 33cdf0e10cSrcweir 34cdf0e10cSrcweir namespace /* private */ 35cdf0e10cSrcweir { 36cdf0e10cSrcweir 37cdf0e10cSrcweir //###################################################### 38cdf0e10cSrcweir /* Extracts the local part of tag without 39cdf0e10cSrcweir namespace decoration e.g. meta:creator -> creator */ 40cdf0e10cSrcweir const XML_Char COLON = (XML_Char)':'; 41cdf0e10cSrcweir 42cdf0e10cSrcweir const XML_Char* get_local_name(const XML_Char* rawname) 43cdf0e10cSrcweir { 44cdf0e10cSrcweir const XML_Char* p = rawname; 45cdf0e10cSrcweir 46cdf0e10cSrcweir // go to the end 47cdf0e10cSrcweir while (*p) p++; 48cdf0e10cSrcweir 49cdf0e10cSrcweir // go back until the first ':' 50cdf0e10cSrcweir while (*p != COLON && p > rawname) 51cdf0e10cSrcweir p--; 52cdf0e10cSrcweir 53cdf0e10cSrcweir // if we are on a colon one step forward 54cdf0e10cSrcweir if (*p == COLON) 55cdf0e10cSrcweir p++; 56cdf0e10cSrcweir 57cdf0e10cSrcweir return p; 58cdf0e10cSrcweir } 59cdf0e10cSrcweir 60cdf0e10cSrcweir //################################################ 61cdf0e10cSrcweir inline xml_parser* get_parser_instance(void* data) 62cdf0e10cSrcweir { 63cdf0e10cSrcweir return reinterpret_cast<xml_parser*>(XML_GetUserData( 64cdf0e10cSrcweir reinterpret_cast<XML_Parser>(data))); 65cdf0e10cSrcweir } 66cdf0e10cSrcweir 67cdf0e10cSrcweir //################################################ 68cdf0e10cSrcweir bool has_only_whitespaces(const XML_Char* s, int len) 69cdf0e10cSrcweir { 70cdf0e10cSrcweir const XML_Char* p = s; 71cdf0e10cSrcweir for (int i = 0; i < len; i++) 72cdf0e10cSrcweir if (*p++ != ' ') return false; 73cdf0e10cSrcweir return true; 74cdf0e10cSrcweir } 75cdf0e10cSrcweir } 76cdf0e10cSrcweir 77cdf0e10cSrcweir //################################################### 78cdf0e10cSrcweir xml_parser::xml_parser(const XML_Char* EncodingName) : 79cdf0e10cSrcweir document_handler_(0), 80cdf0e10cSrcweir xml_parser_(XML_ParserCreate(EncodingName)) 81cdf0e10cSrcweir { 82cdf0e10cSrcweir init(); 83cdf0e10cSrcweir } 84cdf0e10cSrcweir 85cdf0e10cSrcweir //################################################### 86cdf0e10cSrcweir xml_parser::~xml_parser() 87cdf0e10cSrcweir { 88cdf0e10cSrcweir XML_ParserFree(xml_parser_); 89cdf0e10cSrcweir } 90cdf0e10cSrcweir 91cdf0e10cSrcweir //################################################### 92cdf0e10cSrcweir /* Callback functions will be called by the parser on 93cdf0e10cSrcweir different events */ 94cdf0e10cSrcweir 95cdf0e10cSrcweir //################################################### 96cdf0e10cSrcweir extern "C" 97cdf0e10cSrcweir { 98cdf0e10cSrcweir 99cdf0e10cSrcweir static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts) 100cdf0e10cSrcweir { 101cdf0e10cSrcweir assert(UserData != NULL); 102cdf0e10cSrcweir 103cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData); 104cdf0e10cSrcweir 105cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 106cdf0e10cSrcweir if (pDocHdl) 107cdf0e10cSrcweir { 108cdf0e10cSrcweir xml_tag_attribute_container_t attributes; 109cdf0e10cSrcweir 110cdf0e10cSrcweir int i = 0; 111cdf0e10cSrcweir 112cdf0e10cSrcweir while(atts[i]) 113cdf0e10cSrcweir { 114cdf0e10cSrcweir attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]); 115cdf0e10cSrcweir i += 2; // skip to next pair 116cdf0e10cSrcweir } 117cdf0e10cSrcweir 118cdf0e10cSrcweir pDocHdl->start_element( 119cdf0e10cSrcweir reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes); 120cdf0e10cSrcweir } 121cdf0e10cSrcweir } 122cdf0e10cSrcweir 123cdf0e10cSrcweir //################################################### 124cdf0e10cSrcweir static void xml_end_element_handler(void* UserData, const XML_Char* name) 125cdf0e10cSrcweir { 126cdf0e10cSrcweir assert(UserData); 127cdf0e10cSrcweir 128cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData); 129cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 130cdf0e10cSrcweir if (pDocHdl) 131cdf0e10cSrcweir pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name))); 132cdf0e10cSrcweir } 133cdf0e10cSrcweir 134cdf0e10cSrcweir //################################################### 135cdf0e10cSrcweir static void xml_character_data_handler(void* UserData, const XML_Char* s, int len) 136cdf0e10cSrcweir { 137cdf0e10cSrcweir assert(UserData); 138cdf0e10cSrcweir 139cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData); 140cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 141cdf0e10cSrcweir if (pDocHdl) 142cdf0e10cSrcweir { 143cdf0e10cSrcweir if (has_only_whitespaces(s,len)) 144cdf0e10cSrcweir pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len)); 145cdf0e10cSrcweir else 146cdf0e10cSrcweir pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len)); 147cdf0e10cSrcweir } 148cdf0e10cSrcweir } 149cdf0e10cSrcweir 150cdf0e10cSrcweir //################################################### 151cdf0e10cSrcweir static void xml_comment_handler(void* UserData, const XML_Char* Data) 152cdf0e10cSrcweir { 153cdf0e10cSrcweir assert(UserData); 154cdf0e10cSrcweir 155cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData); 156cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 157cdf0e10cSrcweir if (pDocHdl) 158cdf0e10cSrcweir pDocHdl->comment(reinterpret_cast<const char_t*>(Data)); 159cdf0e10cSrcweir } 160cdf0e10cSrcweir 161cdf0e10cSrcweir } // extern "C" 162cdf0e10cSrcweir 163cdf0e10cSrcweir //################################################### 164cdf0e10cSrcweir void xml_parser::init() 165cdf0e10cSrcweir { 166cdf0e10cSrcweir XML_SetUserData(xml_parser_, this); 167cdf0e10cSrcweir 168cdf0e10cSrcweir // we use the parser as handler argument, 169cdf0e10cSrcweir // so we could use it if necessary, the 170cdf0e10cSrcweir // UserData are usable anyway using 171cdf0e10cSrcweir // XML_GetUserData(...) 172cdf0e10cSrcweir XML_UseParserAsHandlerArg(xml_parser_); 173cdf0e10cSrcweir 174cdf0e10cSrcweir XML_SetElementHandler( 175cdf0e10cSrcweir xml_parser_, 176cdf0e10cSrcweir xml_start_element_handler, 177cdf0e10cSrcweir xml_end_element_handler); 178cdf0e10cSrcweir 179cdf0e10cSrcweir XML_SetCharacterDataHandler( 180cdf0e10cSrcweir xml_parser_, 181cdf0e10cSrcweir xml_character_data_handler); 182cdf0e10cSrcweir 183cdf0e10cSrcweir XML_SetCommentHandler( 184cdf0e10cSrcweir xml_parser_, 185cdf0e10cSrcweir xml_comment_handler); 186cdf0e10cSrcweir } 187cdf0e10cSrcweir 188cdf0e10cSrcweir //################################################### 189cdf0e10cSrcweir void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal) 190cdf0e10cSrcweir { 191cdf0e10cSrcweir if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal)) 192cdf0e10cSrcweir throw xml_parser_exception( 193cdf0e10cSrcweir (char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)), 194cdf0e10cSrcweir (int)XML_GetErrorCode(xml_parser_), 195cdf0e10cSrcweir XML_GetCurrentLineNumber(xml_parser_), 196cdf0e10cSrcweir XML_GetCurrentColumnNumber(xml_parser_), 197cdf0e10cSrcweir XML_GetCurrentByteIndex(xml_parser_)); 198cdf0e10cSrcweir } 199cdf0e10cSrcweir 200cdf0e10cSrcweir //################################################### 201cdf0e10cSrcweir void xml_parser::set_document_handler( 202cdf0e10cSrcweir i_xml_parser_event_handler* event_handler) 203cdf0e10cSrcweir { 204cdf0e10cSrcweir document_handler_ = event_handler; 205cdf0e10cSrcweir } 206cdf0e10cSrcweir 207cdf0e10cSrcweir //################################################### 208cdf0e10cSrcweir i_xml_parser_event_handler* xml_parser::get_document_handler() const 209cdf0e10cSrcweir { 210cdf0e10cSrcweir return document_handler_; 211cdf0e10cSrcweir } 212