1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #ifndef INCLUDED_XMLREADER_XMLREADER_HXX
25 #define INCLUDED_XMLREADER_XMLREADER_HXX
26 
27 #include "sal/config.h"
28 
29 #include <stack>
30 #include <vector>
31 
32 #include "boost/noncopyable.hpp"
33 #include "com/sun/star/container/NoSuchElementException.hpp"
34 #include "com/sun/star/uno/RuntimeException.hpp"
35 #include "osl/file.h"
36 #include "rtl/ustring.hxx"
37 #include "sal/types.h"
38 #include "xmlreader/detail/xmlreaderdllapi.hxx"
39 #include "xmlreader/pad.hxx"
40 #include "xmlreader/span.hxx"
41 
42 namespace xmlreader {
43 
44 class OOO_DLLPUBLIC_XMLREADER XmlReader: private boost::noncopyable {
45 public:
46     explicit XmlReader(rtl::OUString const & fileUrl)
47         SAL_THROW((
48             com::sun::star::container::NoSuchElementException,
49             com::sun::star::uno::RuntimeException));
50 
51     ~XmlReader();
52 
53     enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 };
54 
55     enum Text { TEXT_NONE, TEXT_RAW, TEXT_NORMALIZED };
56 
57     enum Result { RESULT_BEGIN, RESULT_END, RESULT_TEXT, RESULT_DONE };
58 
59     int registerNamespaceIri(Span const & iri);
60 
61     // RESULT_BEGIN: data = localName, ns = ns
62     // RESULT_END: data, ns unused
63     // RESULT_TEXT: data = text, ns unused
64     Result nextItem(Text reportText, Span * data, int * nsId);
65 
66     bool nextAttribute(int * nsId, Span * localName);
67 
68     // the span returned by getAttributeValue is only valid until the next call
69     // to nextItem or getAttributeValue
70     Span getAttributeValue(bool fullyNormalize);
71 
72     int getNamespaceId(Span const & prefix) const;
73 
74     rtl::OUString getUrl() const;
75 
76 private:
77     typedef std::vector< Span > NamespaceIris;
78 
79     // If NamespaceData (and similarly ElementData and AttributeData) is made
80     // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
81     // "'xmlreader::XmlReader' declared with greater visibility than the type of
82     // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
83     // elements_ and attributes_):
84 
85     struct NamespaceData {
86         Span prefix;
87         int nsId;
88 
NamespaceDataxmlreader::XmlReader::NamespaceData89         NamespaceData() {}
90 
NamespaceDataxmlreader::XmlReader::NamespaceData91         NamespaceData(Span const & thePrefix, int theNsId):
92             prefix(thePrefix), nsId(theNsId) {}
93     };
94 
95     typedef std::vector< NamespaceData > NamespaceList;
96 
97     struct ElementData {
98         Span name;
99         NamespaceList::size_type inheritedNamespaces;
100         int defaultNamespaceId;
101 
ElementDataxmlreader::XmlReader::ElementData102         ElementData(
103             Span const & theName,
104             NamespaceList::size_type theInheritedNamespaces,
105             int theDefaultNamespaceId):
106             name(theName), inheritedNamespaces(theInheritedNamespaces),
107             defaultNamespaceId(theDefaultNamespaceId)
108         {}
109     };
110 
111     typedef std::stack< ElementData > ElementStack;
112 
113     struct AttributeData {
114         char const * nameBegin;
115         char const * nameEnd;
116         char const * nameColon;
117         char const * valueBegin;
118         char const * valueEnd;
119 
AttributeDataxmlreader::XmlReader::AttributeData120         AttributeData(
121             char const * theNameBegin, char const * theNameEnd,
122             char const * theNameColon, char const * theValueBegin,
123             char const * theValueEnd):
124             nameBegin(theNameBegin), nameEnd(theNameEnd),
125             nameColon(theNameColon), valueBegin(theValueBegin),
126             valueEnd(theValueEnd)
127         {}
128     };
129 
130     typedef std::vector< AttributeData > Attributes;
131 
132     enum State {
133         STATE_CONTENT, STATE_START_TAG, STATE_END_TAG, STATE_EMPTY_ELEMENT_TAG,
134         STATE_DONE };
135 
read()136     SAL_DLLPRIVATE inline char read() { return pos_ == end_ ? '\0' : *pos_++; }
137 
peek()138     SAL_DLLPRIVATE inline char peek() { return pos_ == end_ ? '\0' : *pos_; }
139 
140     SAL_DLLPRIVATE void normalizeLineEnds(Span const & text);
141 
142     SAL_DLLPRIVATE void skipSpace();
143 
144     SAL_DLLPRIVATE bool skipComment();
145 
146     SAL_DLLPRIVATE void skipProcessingInstruction();
147 
148     SAL_DLLPRIVATE void skipDocumentTypeDeclaration();
149 
150     SAL_DLLPRIVATE Span scanCdataSection();
151 
152     SAL_DLLPRIVATE bool scanName(char const ** nameColon);
153 
154     SAL_DLLPRIVATE int scanNamespaceIri(
155         char const * begin, char const * end);
156 
157     SAL_DLLPRIVATE char const * handleReference(
158         char const * position, char const * end);
159 
160     SAL_DLLPRIVATE Span handleAttributeValue(
161         char const * begin, char const * end, bool fullyNormalize);
162 
163     SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName);
164 
165     SAL_DLLPRIVATE Result handleEndTag();
166 
167     SAL_DLLPRIVATE void handleElementEnd();
168 
169     SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId);
170 
171     SAL_DLLPRIVATE Result handleRawText(Span * text);
172 
173     SAL_DLLPRIVATE Result handleNormalizedText(Span * text);
174 
175     SAL_DLLPRIVATE int toNamespaceId(NamespaceIris::size_type pos);
176 
177     rtl::OUString fileUrl_;
178     oslFileHandle fileHandle_;
179     sal_uInt64 fileSize_;
180     void * fileAddress_;
181     NamespaceIris namespaceIris_;
182     NamespaceList namespaces_;
183     ElementStack elements_;
184     char const * pos_;
185     char const * end_;
186     State state_;
187     Attributes attributes_;
188     Attributes::iterator currentAttribute_;
189     bool firstAttribute_;
190     Pad pad_;
191 };
192 
193 }
194 
195 #endif
196