xref: /trunk/main/shell/source/all/xml_parser.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_shell.hxx"
30 
31 #ifndef XML_PARSER_HXX_INCLUDED
32 #include "internal/xml_parser.hxx"
33 #endif
34 #include "internal/i_xml_parser_event_handler.hxx"
35 
36 #include <assert.h>
37 
38 namespace /* private */
39 {
40 
41     //######################################################
42     /*  Extracts the local part of tag without
43         namespace decoration e.g. meta:creator -> creator */
44     const XML_Char COLON = (XML_Char)':';
45 
46     const XML_Char* get_local_name(const XML_Char* rawname)
47     {
48         const XML_Char* p = rawname;
49 
50         // go to the end
51         while (*p) p++;
52 
53         // go back until the first ':'
54         while (*p != COLON && p > rawname)
55             p--;
56 
57         // if we are on a colon one step forward
58         if (*p == COLON)
59             p++;
60 
61         return p;
62     }
63 
64     //################################################
65     inline xml_parser* get_parser_instance(void* data)
66     {
67         return reinterpret_cast<xml_parser*>(XML_GetUserData(
68             reinterpret_cast<XML_Parser>(data)));
69     }
70 
71     //################################################
72     bool has_only_whitespaces(const XML_Char* s, int len)
73     {
74         const XML_Char* p = s;
75         for (int i = 0; i < len; i++)
76             if (*p++ != ' ') return false;
77         return true;
78     }
79 }
80 
81 //###################################################
82 xml_parser::xml_parser(const XML_Char* EncodingName) :
83     document_handler_(0),
84     xml_parser_(XML_ParserCreate(EncodingName))
85 {
86     init();
87 }
88 
89 //###################################################
90 xml_parser::~xml_parser()
91 {
92     XML_ParserFree(xml_parser_);
93 }
94 
95 //###################################################
96 /* Callback functions will be called by the parser on
97    different events */
98 
99 //###################################################
100 extern "C"
101 {
102 
103 static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts)
104 {
105     assert(UserData != NULL);
106 
107     xml_parser* pImpl  = get_parser_instance(UserData);
108 
109     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
110     if (pDocHdl)
111     {
112         xml_tag_attribute_container_t attributes;
113 
114         int i = 0;
115 
116         while(atts[i])
117         {
118             attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]);
119             i += 2; // skip to next pair
120         }
121 
122         pDocHdl->start_element(
123             reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes);
124     }
125 }
126 
127 //###################################################
128 static void xml_end_element_handler(void* UserData, const XML_Char* name)
129 {
130     assert(UserData);
131 
132     xml_parser* pImpl  = get_parser_instance(UserData);
133     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
134     if (pDocHdl)
135         pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)));
136 }
137 
138 //###################################################
139 static void xml_character_data_handler(void* UserData, const XML_Char* s, int len)
140 {
141     assert(UserData);
142 
143     xml_parser* pImpl  = get_parser_instance(UserData);
144     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
145     if (pDocHdl)
146     {
147         if (has_only_whitespaces(s,len))
148             pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len));
149         else
150             pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len));
151     }
152 }
153 
154 //###################################################
155 static void xml_comment_handler(void* UserData, const XML_Char* Data)
156 {
157     assert(UserData);
158 
159     xml_parser* pImpl  = get_parser_instance(UserData);
160     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
161     if (pDocHdl)
162         pDocHdl->comment(reinterpret_cast<const char_t*>(Data));
163 }
164 
165 } // extern "C"
166 
167 //###################################################
168 void xml_parser::init()
169 {
170     XML_SetUserData(xml_parser_, this);
171 
172     // we use the parser as handler argument,
173     // so we could use it if necessary, the
174     // UserData are usable anyway using
175     // XML_GetUserData(...)
176     XML_UseParserAsHandlerArg(xml_parser_);
177 
178     XML_SetElementHandler(
179         xml_parser_,
180         xml_start_element_handler,
181         xml_end_element_handler);
182 
183     XML_SetCharacterDataHandler(
184         xml_parser_,
185         xml_character_data_handler);
186 
187     XML_SetCommentHandler(
188         xml_parser_,
189         xml_comment_handler);
190 }
191 
192 //###################################################
193 void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal)
194 {
195     if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal))
196         throw xml_parser_exception(
197             (char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)),
198             (int)XML_GetErrorCode(xml_parser_),
199             XML_GetCurrentLineNumber(xml_parser_),
200             XML_GetCurrentColumnNumber(xml_parser_),
201             XML_GetCurrentByteIndex(xml_parser_));
202 }
203 
204 //###################################################
205 void xml_parser::set_document_handler(
206     i_xml_parser_event_handler* event_handler)
207 {
208     document_handler_ = event_handler;
209 }
210 
211 //###################################################
212 i_xml_parser_event_handler* xml_parser::get_document_handler() const
213 {
214     return document_handler_;
215 }
216