xref: /aoo41x/main/shell/source/all/xml_parser.cxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_shell.hxx"
30 
31 #ifndef XML_PARSER_HXX_INCLUDED
32 #include "internal/xml_parser.hxx"
33 #endif
34 #include "internal/i_xml_parser_event_handler.hxx"
35 
36 #include <assert.h>
37 
38 namespace /* private */
39 {
40 
41 	//######################################################
42 	/*  Extracts the local part of tag without
43 		namespace decoration e.g. meta:creator -> creator */
44 	const XML_Char COLON = (XML_Char)':';
45 
46 	const XML_Char* get_local_name(const XML_Char* rawname)
47 	{
48 		const XML_Char* p = rawname;
49 
50 		// go to the end
51 		while (*p) p++;
52 
53 		// go back until the first ':'
54 		while (*p != COLON && p > rawname)
55 			p--;
56 
57 		// if we are on a colon one step forward
58 		if (*p == COLON)
59 			p++;
60 
61 		return p;
62 	}
63 
64 	//################################################
65 	inline xml_parser* get_parser_instance(void* data)
66 	{
67 		return reinterpret_cast<xml_parser*>(XML_GetUserData(
68 			reinterpret_cast<XML_Parser>(data)));
69 	}
70 
71     //################################################
72     bool has_only_whitespaces(const XML_Char* s, int len)
73     {
74         const XML_Char* p = s;
75         for (int i = 0; i < len; i++)
76             if (*p++ != ' ') return false;
77         return true;
78     }
79 }
80 
81 //###################################################
82 xml_parser::xml_parser(const XML_Char* EncodingName) :
83 	document_handler_(0),
84 	xml_parser_(XML_ParserCreate(EncodingName))
85 {
86 	init();
87 }
88 
89 //###################################################
90 xml_parser::~xml_parser()
91 {
92 	XML_ParserFree(xml_parser_);
93 }
94 
95 //###################################################
96 /* Callback functions will be called by the parser on
97    different events */
98 
99 //###################################################
100 extern "C"
101 {
102 
103 static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts)
104 {
105 	assert(UserData != NULL);
106 
107 	xml_parser* pImpl  = get_parser_instance(UserData);
108 
109     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
110 	if (pDocHdl)
111 	{
112 		xml_tag_attribute_container_t attributes;
113 
114 		int i = 0;
115 
116 		while(atts[i])
117 		{
118 			attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]);
119 			i += 2; // skip to next pair
120 		}
121 
122 		pDocHdl->start_element(
123 			reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes);
124 	}
125 }
126 
127 //###################################################
128 static void xml_end_element_handler(void* UserData, const XML_Char* name)
129 {
130 	assert(UserData);
131 
132 	xml_parser* pImpl  = get_parser_instance(UserData);
133     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
134 	if (pDocHdl)
135 		pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)));
136 }
137 
138 //###################################################
139 static void xml_character_data_handler(void* UserData, const XML_Char* s, int len)
140 {
141 	assert(UserData);
142 
143 	xml_parser* pImpl  = get_parser_instance(UserData);
144     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
145 	if (pDocHdl)
146     {
147         if (has_only_whitespaces(s,len))
148             pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len));
149         else
150             pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len));
151     }
152 }
153 
154 //###################################################
155 static void xml_comment_handler(void* UserData, const XML_Char* Data)
156 {
157 	assert(UserData);
158 
159 	xml_parser* pImpl  = get_parser_instance(UserData);
160     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
161 	if (pDocHdl)
162 		pDocHdl->comment(reinterpret_cast<const char_t*>(Data));
163 }
164 
165 } // extern "C"
166 
167 //###################################################
168 void xml_parser::init()
169 {
170 	XML_SetUserData(xml_parser_, this);
171 
172 	// we use the parser as handler argument,
173 	// so we could use it if necessary, the
174 	// UserData are usable anyway using
175 	// XML_GetUserData(...)
176 	XML_UseParserAsHandlerArg(xml_parser_);
177 
178 	XML_SetElementHandler(
179 		xml_parser_,
180 		xml_start_element_handler,
181 		xml_end_element_handler);
182 
183 	XML_SetCharacterDataHandler(
184 		xml_parser_,
185 		xml_character_data_handler);
186 
187 	XML_SetCommentHandler(
188 		xml_parser_,
189 		xml_comment_handler);
190 }
191 
192 //###################################################
193 void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal)
194 {
195 	if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal))
196 		throw xml_parser_exception(
197 			(char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)),
198 			(int)XML_GetErrorCode(xml_parser_),
199 			XML_GetCurrentLineNumber(xml_parser_),
200 			XML_GetCurrentColumnNumber(xml_parser_),
201 			XML_GetCurrentByteIndex(xml_parser_));
202 }
203 
204 //###################################################
205 void xml_parser::set_document_handler(
206 	i_xml_parser_event_handler* event_handler)
207 {
208 	document_handler_ = event_handler;
209 }
210 
211 //###################################################
212 i_xml_parser_event_handler* xml_parser::get_document_handler() const
213 {
214 	return document_handler_;
215 }
216