1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #ifndef INCLUDED_XMLREADER_XMLREADER_HXX 25 #define INCLUDED_XMLREADER_XMLREADER_HXX 26 27 #include "sal/config.h" 28 29 #include <stack> 30 #include <vector> 31 32 #include "boost/noncopyable.hpp" 33 #include "com/sun/star/container/NoSuchElementException.hpp" 34 #include "com/sun/star/uno/RuntimeException.hpp" 35 #include "osl/file.h" 36 #include "rtl/ustring.hxx" 37 #include "sal/types.h" 38 #include "xmlreader/detail/xmlreaderdllapi.hxx" 39 #include "xmlreader/pad.hxx" 40 #include "xmlreader/span.hxx" 41 42 namespace xmlreader { 43 44 class OOO_DLLPUBLIC_XMLREADER XmlReader: private boost::noncopyable { 45 public: 46 explicit XmlReader(rtl::OUString const & fileUrl) 47 SAL_THROW(( 48 com::sun::star::container::NoSuchElementException, 49 com::sun::star::uno::RuntimeException)); 50 51 ~XmlReader(); 52 53 enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 }; 54 55 enum Text { TEXT_NONE, TEXT_RAW, TEXT_NORMALIZED }; 56 57 enum Result { RESULT_BEGIN, RESULT_END, RESULT_TEXT, RESULT_DONE }; 58 59 int registerNamespaceIri(Span const & iri); 60 61 // RESULT_BEGIN: data = localName, ns = ns 62 // RESULT_END: data, ns unused 63 // RESULT_TEXT: data = text, ns unused 64 Result nextItem(Text reportText, Span * data, int * nsId); 65 66 bool nextAttribute(int * nsId, Span * localName); 67 68 // the span returned by getAttributeValue is only valid until the next call 69 // to nextItem or getAttributeValue 70 Span getAttributeValue(bool fullyNormalize); 71 72 int getNamespaceId(Span const & prefix) const; 73 74 rtl::OUString getUrl() const; 75 76 private: 77 typedef std::vector< Span > NamespaceIris; 78 79 // If NamespaceData (and similarly ElementData and AttributeData) is made 80 // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about 81 // "'xmlreader::XmlReader' declared with greater visibility than the type of 82 // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for 83 // elements_ and attributes_): 84 85 struct NamespaceData { 86 Span prefix; 87 int nsId; 88 NamespaceDataxmlreader::XmlReader::NamespaceData89 NamespaceData() {} 90 NamespaceDataxmlreader::XmlReader::NamespaceData91 NamespaceData(Span const & thePrefix, int theNsId): 92 prefix(thePrefix), nsId(theNsId) {} 93 }; 94 95 typedef std::vector< NamespaceData > NamespaceList; 96 97 struct ElementData { 98 Span name; 99 NamespaceList::size_type inheritedNamespaces; 100 int defaultNamespaceId; 101 ElementDataxmlreader::XmlReader::ElementData102 ElementData( 103 Span const & theName, 104 NamespaceList::size_type theInheritedNamespaces, 105 int theDefaultNamespaceId): 106 name(theName), inheritedNamespaces(theInheritedNamespaces), 107 defaultNamespaceId(theDefaultNamespaceId) 108 {} 109 }; 110 111 typedef std::stack< ElementData > ElementStack; 112 113 struct AttributeData { 114 char const * nameBegin; 115 char const * nameEnd; 116 char const * nameColon; 117 char const * valueBegin; 118 char const * valueEnd; 119 AttributeDataxmlreader::XmlReader::AttributeData120 AttributeData( 121 char const * theNameBegin, char const * theNameEnd, 122 char const * theNameColon, char const * theValueBegin, 123 char const * theValueEnd): 124 nameBegin(theNameBegin), nameEnd(theNameEnd), 125 nameColon(theNameColon), valueBegin(theValueBegin), 126 valueEnd(theValueEnd) 127 {} 128 }; 129 130 typedef std::vector< AttributeData > Attributes; 131 132 enum State { 133 STATE_CONTENT, STATE_START_TAG, STATE_END_TAG, STATE_EMPTY_ELEMENT_TAG, 134 STATE_DONE }; 135 read()136 SAL_DLLPRIVATE inline char read() { return pos_ == end_ ? '\0' : *pos_++; } 137 peek()138 SAL_DLLPRIVATE inline char peek() { return pos_ == end_ ? '\0' : *pos_; } 139 140 SAL_DLLPRIVATE void normalizeLineEnds(Span const & text); 141 142 SAL_DLLPRIVATE void skipSpace(); 143 144 SAL_DLLPRIVATE bool skipComment(); 145 146 SAL_DLLPRIVATE void skipProcessingInstruction(); 147 148 SAL_DLLPRIVATE void skipDocumentTypeDeclaration(); 149 150 SAL_DLLPRIVATE Span scanCdataSection(); 151 152 SAL_DLLPRIVATE bool scanName(char const ** nameColon); 153 154 SAL_DLLPRIVATE int scanNamespaceIri( 155 char const * begin, char const * end); 156 157 SAL_DLLPRIVATE char const * handleReference( 158 char const * position, char const * end); 159 160 SAL_DLLPRIVATE Span handleAttributeValue( 161 char const * begin, char const * end, bool fullyNormalize); 162 163 SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName); 164 165 SAL_DLLPRIVATE Result handleEndTag(); 166 167 SAL_DLLPRIVATE void handleElementEnd(); 168 169 SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId); 170 171 SAL_DLLPRIVATE Result handleRawText(Span * text); 172 173 SAL_DLLPRIVATE Result handleNormalizedText(Span * text); 174 175 SAL_DLLPRIVATE int toNamespaceId(NamespaceIris::size_type pos); 176 177 rtl::OUString fileUrl_; 178 oslFileHandle fileHandle_; 179 sal_uInt64 fileSize_; 180 void * fileAddress_; 181 NamespaceIris namespaceIris_; 182 NamespaceList namespaces_; 183 ElementStack elements_; 184 char const * pos_; 185 char const * end_; 186 State state_; 187 Attributes attributes_; 188 Attributes::iterator currentAttribute_; 189 bool firstAttribute_; 190 Pad pad_; 191 }; 192 193 } 194 195 #endif 196