1*f8e2c85aSAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*f8e2c85aSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*f8e2c85aSAndrew Rist * or more contributor license agreements. See the NOTICE file
5*f8e2c85aSAndrew Rist * distributed with this work for additional information
6*f8e2c85aSAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*f8e2c85aSAndrew Rist * to you under the Apache License, Version 2.0 (the
8*f8e2c85aSAndrew Rist * "License"); you may not use this file except in compliance
9*f8e2c85aSAndrew Rist * with the License. You may obtain a copy of the License at
10*f8e2c85aSAndrew Rist *
11*f8e2c85aSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12*f8e2c85aSAndrew Rist *
13*f8e2c85aSAndrew Rist * Unless required by applicable law or agreed to in writing,
14*f8e2c85aSAndrew Rist * software distributed under the License is distributed on an
15*f8e2c85aSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*f8e2c85aSAndrew Rist * KIND, either express or implied. See the License for the
17*f8e2c85aSAndrew Rist * specific language governing permissions and limitations
18*f8e2c85aSAndrew Rist * under the License.
19*f8e2c85aSAndrew Rist *
20*f8e2c85aSAndrew Rist *************************************************************/
21*f8e2c85aSAndrew Rist
22*f8e2c85aSAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_shell.hxx"
26cdf0e10cSrcweir
27cdf0e10cSrcweir #ifndef XML_PARSER_HXX_INCLUDED
28cdf0e10cSrcweir #include "internal/xml_parser.hxx"
29cdf0e10cSrcweir #endif
30cdf0e10cSrcweir #include "internal/i_xml_parser_event_handler.hxx"
31cdf0e10cSrcweir
32cdf0e10cSrcweir #include <assert.h>
33cdf0e10cSrcweir
34cdf0e10cSrcweir namespace /* private */
35cdf0e10cSrcweir {
36cdf0e10cSrcweir
37cdf0e10cSrcweir //######################################################
38cdf0e10cSrcweir /* Extracts the local part of tag without
39cdf0e10cSrcweir namespace decoration e.g. meta:creator -> creator */
40cdf0e10cSrcweir const XML_Char COLON = (XML_Char)':';
41cdf0e10cSrcweir
get_local_name(const XML_Char * rawname)42cdf0e10cSrcweir const XML_Char* get_local_name(const XML_Char* rawname)
43cdf0e10cSrcweir {
44cdf0e10cSrcweir const XML_Char* p = rawname;
45cdf0e10cSrcweir
46cdf0e10cSrcweir // go to the end
47cdf0e10cSrcweir while (*p) p++;
48cdf0e10cSrcweir
49cdf0e10cSrcweir // go back until the first ':'
50cdf0e10cSrcweir while (*p != COLON && p > rawname)
51cdf0e10cSrcweir p--;
52cdf0e10cSrcweir
53cdf0e10cSrcweir // if we are on a colon one step forward
54cdf0e10cSrcweir if (*p == COLON)
55cdf0e10cSrcweir p++;
56cdf0e10cSrcweir
57cdf0e10cSrcweir return p;
58cdf0e10cSrcweir }
59cdf0e10cSrcweir
60cdf0e10cSrcweir //################################################
get_parser_instance(void * data)61cdf0e10cSrcweir inline xml_parser* get_parser_instance(void* data)
62cdf0e10cSrcweir {
63cdf0e10cSrcweir return reinterpret_cast<xml_parser*>(XML_GetUserData(
64cdf0e10cSrcweir reinterpret_cast<XML_Parser>(data)));
65cdf0e10cSrcweir }
66cdf0e10cSrcweir
67cdf0e10cSrcweir //################################################
has_only_whitespaces(const XML_Char * s,int len)68cdf0e10cSrcweir bool has_only_whitespaces(const XML_Char* s, int len)
69cdf0e10cSrcweir {
70cdf0e10cSrcweir const XML_Char* p = s;
71cdf0e10cSrcweir for (int i = 0; i < len; i++)
72cdf0e10cSrcweir if (*p++ != ' ') return false;
73cdf0e10cSrcweir return true;
74cdf0e10cSrcweir }
75cdf0e10cSrcweir }
76cdf0e10cSrcweir
77cdf0e10cSrcweir //###################################################
xml_parser(const XML_Char * EncodingName)78cdf0e10cSrcweir xml_parser::xml_parser(const XML_Char* EncodingName) :
79cdf0e10cSrcweir document_handler_(0),
80cdf0e10cSrcweir xml_parser_(XML_ParserCreate(EncodingName))
81cdf0e10cSrcweir {
82cdf0e10cSrcweir init();
83cdf0e10cSrcweir }
84cdf0e10cSrcweir
85cdf0e10cSrcweir //###################################################
~xml_parser()86cdf0e10cSrcweir xml_parser::~xml_parser()
87cdf0e10cSrcweir {
88cdf0e10cSrcweir XML_ParserFree(xml_parser_);
89cdf0e10cSrcweir }
90cdf0e10cSrcweir
91cdf0e10cSrcweir //###################################################
92cdf0e10cSrcweir /* Callback functions will be called by the parser on
93cdf0e10cSrcweir different events */
94cdf0e10cSrcweir
95cdf0e10cSrcweir //###################################################
96cdf0e10cSrcweir extern "C"
97cdf0e10cSrcweir {
98cdf0e10cSrcweir
xml_start_element_handler(void * UserData,const XML_Char * name,const XML_Char ** atts)99cdf0e10cSrcweir static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts)
100cdf0e10cSrcweir {
101cdf0e10cSrcweir assert(UserData != NULL);
102cdf0e10cSrcweir
103cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData);
104cdf0e10cSrcweir
105cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
106cdf0e10cSrcweir if (pDocHdl)
107cdf0e10cSrcweir {
108cdf0e10cSrcweir xml_tag_attribute_container_t attributes;
109cdf0e10cSrcweir
110cdf0e10cSrcweir int i = 0;
111cdf0e10cSrcweir
112cdf0e10cSrcweir while(atts[i])
113cdf0e10cSrcweir {
114cdf0e10cSrcweir attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]);
115cdf0e10cSrcweir i += 2; // skip to next pair
116cdf0e10cSrcweir }
117cdf0e10cSrcweir
118cdf0e10cSrcweir pDocHdl->start_element(
119cdf0e10cSrcweir reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes);
120cdf0e10cSrcweir }
121cdf0e10cSrcweir }
122cdf0e10cSrcweir
123cdf0e10cSrcweir //###################################################
xml_end_element_handler(void * UserData,const XML_Char * name)124cdf0e10cSrcweir static void xml_end_element_handler(void* UserData, const XML_Char* name)
125cdf0e10cSrcweir {
126cdf0e10cSrcweir assert(UserData);
127cdf0e10cSrcweir
128cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData);
129cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
130cdf0e10cSrcweir if (pDocHdl)
131cdf0e10cSrcweir pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)));
132cdf0e10cSrcweir }
133cdf0e10cSrcweir
134cdf0e10cSrcweir //###################################################
xml_character_data_handler(void * UserData,const XML_Char * s,int len)135cdf0e10cSrcweir static void xml_character_data_handler(void* UserData, const XML_Char* s, int len)
136cdf0e10cSrcweir {
137cdf0e10cSrcweir assert(UserData);
138cdf0e10cSrcweir
139cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData);
140cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
141cdf0e10cSrcweir if (pDocHdl)
142cdf0e10cSrcweir {
143cdf0e10cSrcweir if (has_only_whitespaces(s,len))
144cdf0e10cSrcweir pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len));
145cdf0e10cSrcweir else
146cdf0e10cSrcweir pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len));
147cdf0e10cSrcweir }
148cdf0e10cSrcweir }
149cdf0e10cSrcweir
150cdf0e10cSrcweir //###################################################
xml_comment_handler(void * UserData,const XML_Char * Data)151cdf0e10cSrcweir static void xml_comment_handler(void* UserData, const XML_Char* Data)
152cdf0e10cSrcweir {
153cdf0e10cSrcweir assert(UserData);
154cdf0e10cSrcweir
155cdf0e10cSrcweir xml_parser* pImpl = get_parser_instance(UserData);
156cdf0e10cSrcweir i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
157cdf0e10cSrcweir if (pDocHdl)
158cdf0e10cSrcweir pDocHdl->comment(reinterpret_cast<const char_t*>(Data));
159cdf0e10cSrcweir }
160cdf0e10cSrcweir
161cdf0e10cSrcweir } // extern "C"
162cdf0e10cSrcweir
163cdf0e10cSrcweir //###################################################
init()164cdf0e10cSrcweir void xml_parser::init()
165cdf0e10cSrcweir {
166cdf0e10cSrcweir XML_SetUserData(xml_parser_, this);
167cdf0e10cSrcweir
168cdf0e10cSrcweir // we use the parser as handler argument,
169cdf0e10cSrcweir // so we could use it if necessary, the
170cdf0e10cSrcweir // UserData are usable anyway using
171cdf0e10cSrcweir // XML_GetUserData(...)
172cdf0e10cSrcweir XML_UseParserAsHandlerArg(xml_parser_);
173cdf0e10cSrcweir
174cdf0e10cSrcweir XML_SetElementHandler(
175cdf0e10cSrcweir xml_parser_,
176cdf0e10cSrcweir xml_start_element_handler,
177cdf0e10cSrcweir xml_end_element_handler);
178cdf0e10cSrcweir
179cdf0e10cSrcweir XML_SetCharacterDataHandler(
180cdf0e10cSrcweir xml_parser_,
181cdf0e10cSrcweir xml_character_data_handler);
182cdf0e10cSrcweir
183cdf0e10cSrcweir XML_SetCommentHandler(
184cdf0e10cSrcweir xml_parser_,
185cdf0e10cSrcweir xml_comment_handler);
186cdf0e10cSrcweir }
187cdf0e10cSrcweir
188cdf0e10cSrcweir //###################################################
parse(const char * XmlData,size_t Length,bool IsFinal)189cdf0e10cSrcweir void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal)
190cdf0e10cSrcweir {
191cdf0e10cSrcweir if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal))
192cdf0e10cSrcweir throw xml_parser_exception(
193cdf0e10cSrcweir (char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)),
194cdf0e10cSrcweir (int)XML_GetErrorCode(xml_parser_),
195cdf0e10cSrcweir XML_GetCurrentLineNumber(xml_parser_),
196cdf0e10cSrcweir XML_GetCurrentColumnNumber(xml_parser_),
197cdf0e10cSrcweir XML_GetCurrentByteIndex(xml_parser_));
198cdf0e10cSrcweir }
199cdf0e10cSrcweir
200cdf0e10cSrcweir //###################################################
set_document_handler(i_xml_parser_event_handler * event_handler)201cdf0e10cSrcweir void xml_parser::set_document_handler(
202cdf0e10cSrcweir i_xml_parser_event_handler* event_handler)
203cdf0e10cSrcweir {
204cdf0e10cSrcweir document_handler_ = event_handler;
205cdf0e10cSrcweir }
206cdf0e10cSrcweir
207cdf0e10cSrcweir //###################################################
get_document_handler() const208cdf0e10cSrcweir i_xml_parser_event_handler* xml_parser::get_document_handler() const
209cdf0e10cSrcweir {
210cdf0e10cSrcweir return document_handler_;
211cdf0e10cSrcweir }
212