1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #include "precompiled_xmloff.hxx"
25
26 #include "RDFaImportHelper.hxx"
27
28 #include <xmloff/xmlimp.hxx>
29 #include <xmloff/nmspmap.hxx>
30
31 #include <comphelper/sequenceasvector.hxx>
32
33 #include <tools/string.hxx> // for GetAbsoluteReference
34
35 #include <com/sun/star/rdf/URI.hpp>
36 #include <com/sun/star/rdf/XDocumentMetadataAccess.hpp>
37 #include <com/sun/star/rdf/XDocumentRepository.hpp>
38
39 #include <rtl/ustring.hxx>
40
41 #include <boost/bind.hpp>
42 #include <boost/iterator_adaptors.hpp>
43 #ifndef BOOST_ITERATOR_ADAPTOR_DWA053000_HPP_ // from iterator_adaptors.hpp
44 // N.B.: the check for the header guard _of a specific version of boost_
45 // is here so this may work on different versions of boost,
46 // which sadly put the goods in different header files
47 #include <boost/iterator/transform_iterator.hpp>
48 #endif
49
50 #include <map>
51 #include <iterator>
52 #include <functional>
53 #include <algorithm>
54
55
56 using namespace ::com::sun::star;
57
58 namespace xmloff {
59
60 /** a bit of context for parsing RDFa attributes */
61 class SAL_DLLPRIVATE RDFaReader
62 {
63 const SvXMLImport & m_rImport;
64
GetImport() const65 const SvXMLImport & GetImport() const { return m_rImport; }
66
67 //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute
GetAbsoluteReference(::rtl::OUString const & i_rURI) const68 ::rtl::OUString GetAbsoluteReference(::rtl::OUString const & i_rURI) const
69 {
70 if (!i_rURI.getLength() || i_rURI[0] == '#')
71 {
72 return GetImport().GetBaseURL() + i_rURI;
73 }
74 else
75 {
76 return GetImport().GetAbsoluteReference(i_rURI);
77 }
78 }
79
80 public:
RDFaReader(SvXMLImport const & i_rImport)81 RDFaReader(SvXMLImport const & i_rImport)
82 : m_rImport(i_rImport)
83 { }
84
85 // returns URI or blank node!
86 ::rtl::OUString ReadCURIE(::rtl::OUString const & i_rCURIE) const;
87
88 std::vector< ::rtl::OUString >
89 ReadCURIEs(::rtl::OUString const & i_rCURIEs) const;
90
91 ::rtl::OUString
92 ReadURIOrSafeCURIE( ::rtl::OUString const & i_rURIOrSafeCURIE) const;
93 };
94
95 /** helper to insert RDFa statements into the RDF repository */
96 class SAL_DLLPRIVATE RDFaInserter
97 {
98 const uno::Reference<uno::XComponentContext> m_xContext;
99 uno::Reference< rdf::XDocumentRepository > m_xRepository;
100
101 typedef ::std::map< ::rtl::OUString, uno::Reference< rdf::XBlankNode > >
102 BlankNodeMap_t;
103
104 BlankNodeMap_t m_BlankNodeMap;
105
106 public:
RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext,uno::Reference<rdf::XDocumentRepository> const & i_xRepository)107 RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext,
108 uno::Reference< rdf::XDocumentRepository > const & i_xRepository)
109 : m_xContext(i_xContext)
110 , m_xRepository(i_xRepository)
111 {}
112
113 uno::Reference< rdf::XBlankNode >
114 LookupBlankNode(::rtl::OUString const & i_rNodeId );
115
116 uno::Reference< rdf::XURI >
117 MakeURI( ::rtl::OUString const & i_rURI) const;
118
119 uno::Reference< rdf::XResource>
120 MakeResource( ::rtl::OUString const & i_rResource);
121
122 void InsertRDFaEntry(struct RDFaEntry const & i_rEntry);
123 };
124
125 /** store parsed RDFa attributes */
126 struct SAL_DLLPRIVATE ParsedRDFaAttributes
127 {
128 ::rtl::OUString m_About;
129 ::std::vector< ::rtl::OUString > m_Properties;
130 ::rtl::OUString m_Content;
131 ::rtl::OUString m_Datatype;
132
ParsedRDFaAttributesxmloff::ParsedRDFaAttributes133 ParsedRDFaAttributes(
134 ::rtl::OUString const & i_rAbout,
135 ::std::vector< ::rtl::OUString > const & i_rProperties,
136 ::rtl::OUString const & i_rContent,
137 ::rtl::OUString const & i_rDatatype)
138 : m_About(i_rAbout)
139 , m_Properties(i_rProperties)
140 , m_Content(i_rContent)
141 , m_Datatype(i_rDatatype)
142 { }
143 };
144
145 /** store metadatable object and its RDFa attributes */
146 struct SAL_DLLPRIVATE RDFaEntry
147 {
148 uno::Reference<rdf::XMetadatable> m_xObject;
149 ::boost::shared_ptr<ParsedRDFaAttributes> m_pRDFaAttributes;
150
RDFaEntryxmloff::RDFaEntry151 RDFaEntry(uno::Reference<rdf::XMetadatable> const & i_xObject,
152 ::boost::shared_ptr<ParsedRDFaAttributes> const& i_pRDFaAttributes)
153 : m_xObject(i_xObject)
154 , m_pRDFaAttributes(i_pRDFaAttributes)
155 { }
156 };
157
158 ////////////////////////////////////////////////////////////////////////////
159
160
isWS(const sal_Unicode i_Char)161 static inline bool isWS(const sal_Unicode i_Char)
162 {
163 return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char)
164 || (' ' == i_Char);
165 }
166
splitAtWS(::rtl::OUString & io_rString)167 static ::rtl::OUString splitAtWS(::rtl::OUString & io_rString)
168 {
169 const sal_Int32 len( io_rString.getLength() );
170 sal_Int32 idxstt(0);
171 while ((idxstt < len) && ( isWS(io_rString[idxstt])))
172 ++idxstt; // skip leading ws
173 sal_Int32 idxend(idxstt);
174 while ((idxend < len) && (!isWS(io_rString[idxend])))
175 ++idxend; // the CURIE
176 const ::rtl::OUString ret(io_rString.copy(idxstt, idxend - idxstt));
177 io_rString = io_rString.copy(idxend); // rest
178 return ret;
179 }
180
181 ::rtl::OUString
ReadCURIE(::rtl::OUString const & i_rCURIE) const182 RDFaReader::ReadCURIE(::rtl::OUString const & i_rCURIE) const
183 {
184 // the RDFa spec says that a prefix is required (it may be empty: ":foo")
185 const sal_Int32 idx( i_rCURIE.indexOf(':') );
186 if (idx >= 0)
187 {
188 ::rtl::OUString Prefix;
189 ::rtl::OUString LocalName;
190 ::rtl::OUString Namespace;
191 sal_uInt16 nKey( GetImport().GetNamespaceMap()._GetKeyByAttrName(
192 i_rCURIE, &Prefix, &LocalName, &Namespace) );
193 if (Prefix.equalsAscii("_"))
194 {
195 // eeek, it's a bnode!
196 // "_" is not a valid URI scheme => we can identify bnodes
197 return i_rCURIE;
198 }
199 else
200 {
201 OSL_ENSURE(XML_NAMESPACE_NONE != nKey, "no namespace?");
202 if ((XML_NAMESPACE_UNKNOWN != nKey) &&
203 (XML_NAMESPACE_XMLNS != nKey))
204 {
205 // N.B.: empty LocalName is valid!
206 const ::rtl::OUString URI(Namespace + LocalName);
207 // return GetImport().GetAbsoluteReference(URI);
208 return GetAbsoluteReference(URI);
209 }
210 else
211 {
212 OSL_TRACE( "ReadCURIE: invalid CURIE: invalid prefix" );
213 }
214 }
215 }
216 else
217 {
218 OSL_TRACE( "ReadCURIE: invalid CURIE: no prefix" );
219 }
220
221 return ::rtl::OUString();
222 }
223
224 ::std::vector< ::rtl::OUString >
ReadCURIEs(::rtl::OUString const & i_rCURIEs) const225 RDFaReader::ReadCURIEs(::rtl::OUString const & i_rCURIEs) const
226 {
227 std::vector< ::rtl::OUString > vec;
228 ::rtl::OUString CURIEs(i_rCURIEs);
229 do {
230 ::rtl::OUString curie( splitAtWS(CURIEs) );
231 if (curie.getLength())
232 {
233 const ::rtl::OUString uri(ReadCURIE(curie));
234 if (uri.getLength())
235 {
236 vec.push_back(uri);
237 }
238 }
239 }
240 while (CURIEs.getLength());
241 if (!vec.size())
242 {
243 OSL_TRACE( "ReadCURIEs: invalid CURIEs" );
244 }
245 return vec;
246 }
247
248 ::rtl::OUString
ReadURIOrSafeCURIE(::rtl::OUString const & i_rURIOrSafeCURIE) const249 RDFaReader::ReadURIOrSafeCURIE(::rtl::OUString const & i_rURIOrSafeCURIE) const
250 {
251 const sal_Int32 len(i_rURIOrSafeCURIE.getLength());
252 if (len && (i_rURIOrSafeCURIE[0] == '['))
253 {
254 if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']'))
255 {
256 return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2));
257 }
258 else
259 {
260 OSL_TRACE( "ReadURIOrSafeCURIE: invalid SafeCURIE" );
261 return ::rtl::OUString();
262 }
263 }
264 else
265 {
266 if (i_rURIOrSafeCURIE.matchAsciiL("_:", 2)) // blank node
267 {
268 OSL_TRACE( "ReadURIOrSafeCURIE: invalid URI: scheme is _" );
269 return ::rtl::OUString();
270 }
271 else
272 {
273 // return GetImport().GetAbsoluteReference(i_rURIOrSafeCURIE);
274 return GetAbsoluteReference(i_rURIOrSafeCURIE);
275 }
276 }
277 }
278
279 ////////////////////////////////////////////////////////////////////////////
280
281 uno::Reference< rdf::XBlankNode >
LookupBlankNode(::rtl::OUString const & i_rNodeId)282 RDFaInserter::LookupBlankNode(::rtl::OUString const & i_rNodeId )
283 {
284 uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] );
285 if (!rEntry.is())
286 {
287 rEntry = m_xRepository->createBlankNode();
288 }
289 return rEntry;
290 }
291
292 uno::Reference< rdf::XURI >
MakeURI(::rtl::OUString const & i_rURI) const293 RDFaInserter::MakeURI( ::rtl::OUString const & i_rURI) const
294 {
295 if (i_rURI.matchAsciiL("_:", 2)) // blank node
296 {
297 OSL_TRACE("MakeURI: cannot create URI for blank node");
298 return 0;
299 }
300 else
301 {
302 try
303 {
304 return rdf::URI::create( m_xContext, i_rURI );
305 }
306 catch (uno::Exception &)
307 {
308 OSL_ENSURE(false, "MakeURI: cannot create URI");
309 return 0;
310 }
311 }
312 }
313
314 uno::Reference< rdf::XResource>
MakeResource(::rtl::OUString const & i_rResource)315 RDFaInserter::MakeResource( ::rtl::OUString const & i_rResource)
316 {
317 if (i_rResource.matchAsciiL("_:", 2)) // blank node
318 {
319 // we cannot use the blank node label as-is: it must be distinct
320 // from labels in other graphs, so create fresh ones per XML stream
321 // N.B.: content.xml and styles.xml are distinct graphs
322 ::rtl::OUString name( i_rResource.copy(2) );
323 const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) );
324 OSL_ENSURE(xBNode.is(), "no blank node?");
325 return uno::Reference<rdf::XResource>( xBNode, uno::UNO_QUERY);
326 }
327 else
328 {
329 return uno::Reference<rdf::XResource>( MakeURI( i_rResource ),
330 uno::UNO_QUERY);
331 }
332 }
333
334 /** i wrote this because c++ implementations cannot agree on which variant
335 of boost::bind and std::mem_fun_ref applied to Reference::is compiles */
336 class ref_is_null :
337 public ::std::unary_function<sal_Bool, const uno::Reference<rdf::XURI> & >
338 {
339 public:
operator ()(const uno::Reference<rdf::XURI> & i_rRef)340 sal_Bool operator() (const uno::Reference<rdf::XURI> & i_rRef)
341 {
342 return !i_rRef.is();
343 }
344 };
345
InsertRDFaEntry(struct RDFaEntry const & i_rEntry)346 void RDFaInserter::InsertRDFaEntry(
347 struct RDFaEntry const & i_rEntry)
348 {
349 OSL_ENSURE(i_rEntry.m_xObject.is(),
350 "InsertRDFaEntry: invalid arg: null object");
351 if (!i_rEntry.m_xObject.is()) return;
352
353 const uno::Reference< rdf::XResource > xSubject(
354 MakeResource( i_rEntry.m_pRDFaAttributes->m_About ) );
355 if (!xSubject.is())
356 {
357 return; // invalid
358 }
359
360 ::comphelper::SequenceAsVector< uno::Reference< rdf::XURI > > predicates;
361
362 predicates.reserve(i_rEntry.m_pRDFaAttributes->m_Properties.size());
363
364 ::std::remove_copy_if(
365 ::boost::make_transform_iterator(
366 i_rEntry.m_pRDFaAttributes->m_Properties.begin(),
367 ::boost::bind(&RDFaInserter::MakeURI, this, _1)),
368 // argh, this must be the same type :(
369 ::boost::make_transform_iterator(
370 i_rEntry.m_pRDFaAttributes->m_Properties.end(),
371 ::boost::bind(&RDFaInserter::MakeURI, this, _1)),
372 ::std::back_inserter(predicates),
373 ref_is_null() );
374 // compiles only on wntmsci12
375 // ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool>(&uno::Reference<rdf::XURI>::is, _1)));
376 // compiles on unxsoli4, wntsci12, but not unxlngi6
377 // ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool, com::sun::star::uno::Reference<rdf::XURI> >(&uno::Reference<rdf::XURI>::is, _1)));
378 // compiles on unxsoli4, unxlngi6, but not wntsci12
379 // ::std::not1( ::std::mem_fun_ref(&uno::Reference<rdf::XURI>::is)) );
380
381 if (!predicates.size())
382 {
383 return; // invalid
384 }
385
386 uno::Reference<rdf::XURI> xDatatype;
387 if (i_rEntry.m_pRDFaAttributes->m_Datatype.getLength())
388 {
389 xDatatype = MakeURI( i_rEntry.m_pRDFaAttributes->m_Datatype );
390 }
391
392 try
393 {
394 // N.B.: this will call xMeta->ensureMetadataReference, which is why
395 // this must be done _after_ importing the whole XML file,
396 // to prevent collision between generated ids and ids in the file
397 m_xRepository->setStatementRDFa(xSubject, predicates.getAsConstList(),
398 i_rEntry.m_xObject,
399 i_rEntry.m_pRDFaAttributes->m_Content, xDatatype);
400 }
401 catch (uno::Exception &)
402 {
403 OSL_ENSURE(false, "InsertRDFaEntry: setStatementRDFa failed?");
404 }
405 }
406
407 ////////////////////////////////////////////////////////////////////////////
408
RDFaImportHelper(const SvXMLImport & i_rImport)409 RDFaImportHelper::RDFaImportHelper(const SvXMLImport & i_rImport)
410 : m_rImport(i_rImport)
411 {
412 }
413
~RDFaImportHelper()414 RDFaImportHelper::~RDFaImportHelper()
415 {
416 }
417
418 ::boost::shared_ptr<ParsedRDFaAttributes>
ParseRDFa(::rtl::OUString const & i_rAbout,::rtl::OUString const & i_rProperty,::rtl::OUString const & i_rContent,::rtl::OUString const & i_rDatatype)419 RDFaImportHelper::ParseRDFa(
420 ::rtl::OUString const & i_rAbout,
421 ::rtl::OUString const & i_rProperty,
422 ::rtl::OUString const & i_rContent,
423 ::rtl::OUString const & i_rDatatype)
424 {
425 if (!i_rProperty.getLength())
426 {
427 OSL_TRACE("AddRDFa: invalid input: xhtml:property empty");
428 return ::boost::shared_ptr<ParsedRDFaAttributes>();
429 }
430 // must parse CURIEs here: need namespace declaration context
431 RDFaReader reader(GetImport());
432 const ::rtl::OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) );
433 if (!about.getLength()) {
434 return ::boost::shared_ptr<ParsedRDFaAttributes>();
435 }
436 const ::std::vector< ::rtl::OUString > properties(
437 reader.ReadCURIEs(i_rProperty) );
438 if (!properties.size()) {
439 return ::boost::shared_ptr<ParsedRDFaAttributes>();
440 }
441 const ::rtl::OUString datatype( i_rDatatype.getLength()
442 ? reader.ReadCURIE(i_rDatatype)
443 : ::rtl::OUString() );
444 return ::boost::shared_ptr<ParsedRDFaAttributes>(
445 new ParsedRDFaAttributes(about, properties, i_rContent, datatype));
446 }
447
448 void
AddRDFa(uno::Reference<rdf::XMetadatable> const & i_xObject,::boost::shared_ptr<ParsedRDFaAttributes> & i_pRDFaAttributes)449 RDFaImportHelper::AddRDFa(
450 uno::Reference<rdf::XMetadatable> const & i_xObject,
451 ::boost::shared_ptr<ParsedRDFaAttributes> & i_pRDFaAttributes)
452 {
453 if (!i_xObject.is())
454 {
455 OSL_ENSURE(false, "AddRDFa: invalid arg: null textcontent");
456 return;
457 }
458 if (!i_pRDFaAttributes.get())
459 {
460 OSL_ENSURE(false, "AddRDFa: invalid arg: null RDFa attributes");
461 return;
462 }
463 m_RDFaEntries.push_back(RDFaEntry(i_xObject, i_pRDFaAttributes));
464 }
465
466 void
ParseAndAddRDFa(uno::Reference<rdf::XMetadatable> const & i_xObject,::rtl::OUString const & i_rAbout,::rtl::OUString const & i_rProperty,::rtl::OUString const & i_rContent,::rtl::OUString const & i_rDatatype)467 RDFaImportHelper::ParseAndAddRDFa(
468 uno::Reference<rdf::XMetadatable> const & i_xObject,
469 ::rtl::OUString const & i_rAbout,
470 ::rtl::OUString const & i_rProperty,
471 ::rtl::OUString const & i_rContent,
472 ::rtl::OUString const & i_rDatatype)
473 {
474 ::boost::shared_ptr<ParsedRDFaAttributes> pAttributes(
475 ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) );
476 if (pAttributes.get())
477 {
478 AddRDFa(i_xObject, pAttributes);
479 }
480 }
481
InsertRDFa(uno::Reference<rdf::XRepositorySupplier> const & i_xModel)482 void RDFaImportHelper::InsertRDFa(
483 uno::Reference< rdf::XRepositorySupplier> const & i_xModel)
484 {
485 OSL_ENSURE(i_xModel.is(), "InsertRDFa: invalid arg: model null");
486 if (!i_xModel.is()) return;
487 const uno::Reference< rdf::XDocumentRepository > xRepository(
488 i_xModel->getRDFRepository(), uno::UNO_QUERY);
489 OSL_ENSURE(xRepository.is(), "InsertRDFa: no DocumentRepository?");
490 if (!xRepository.is()) return;
491 RDFaInserter inserter(GetImport().GetComponentContext(), xRepository);
492 ::std::for_each(m_RDFaEntries.begin(), m_RDFaEntries.end(),
493 ::boost::bind(&RDFaInserter::InsertRDFaEntry, &inserter, _1));
494 }
495
496 } // namespace xmloff
497
498