1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #include "precompiled_xmloff.hxx"
25 
26 #include "RDFaImportHelper.hxx"
27 
28 #include <xmloff/xmlimp.hxx>
29 #include <xmloff/nmspmap.hxx>
30 
31 #include <comphelper/sequenceasvector.hxx>
32 
33 #include <tools/string.hxx> // for GetAbsoluteReference
34 
35 #include <com/sun/star/rdf/URI.hpp>
36 #include <com/sun/star/rdf/XDocumentMetadataAccess.hpp>
37 #include <com/sun/star/rdf/XDocumentRepository.hpp>
38 
39 #include <rtl/ustring.hxx>
40 
41 #include <boost/bind.hpp>
42 #include <boost/iterator_adaptors.hpp>
43 #ifndef BOOST_ITERATOR_ADAPTOR_DWA053000_HPP_ // from iterator_adaptors.hpp
44 // N.B.: the check for the header guard _of a specific version of boost_
45 //       is here so this may work on different versions of boost,
46 //       which sadly put the goods in different header files
47 #include <boost/iterator/transform_iterator.hpp>
48 #endif
49 
50 #include <map>
51 #include <iterator>
52 #include <functional>
53 #include <algorithm>
54 
55 
56 using namespace ::com::sun::star;
57 
58 namespace xmloff {
59 
60 /** a bit of context for parsing RDFa attributes */
61 class SAL_DLLPRIVATE RDFaReader
62 {
63     const SvXMLImport & m_rImport;
64 
65     const SvXMLImport & GetImport() const { return m_rImport; }
66 
67     //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute
68     ::rtl::OUString GetAbsoluteReference(::rtl::OUString const & i_rURI) const
69     {
70         if (!i_rURI.getLength() || i_rURI[0] == '#')
71         {
72             return GetImport().GetBaseURL() + i_rURI;
73         }
74         else
75         {
76             return GetImport().GetAbsoluteReference(i_rURI);
77         }
78     }
79 
80 public:
81     RDFaReader(SvXMLImport const & i_rImport)
82         : m_rImport(i_rImport)
83     { }
84 
85     // returns URI or blank node!
86     ::rtl::OUString ReadCURIE(::rtl::OUString const & i_rCURIE) const;
87 
88     std::vector< ::rtl::OUString >
89     ReadCURIEs(::rtl::OUString const & i_rCURIEs) const;
90 
91     ::rtl::OUString
92     ReadURIOrSafeCURIE( ::rtl::OUString const & i_rURIOrSafeCURIE) const;
93 };
94 
95 /** helper to insert RDFa statements into the RDF repository */
96 class SAL_DLLPRIVATE RDFaInserter
97 {
98     const uno::Reference<uno::XComponentContext> m_xContext;
99     uno::Reference< rdf::XDocumentRepository > m_xRepository;
100 
101     typedef ::std::map< ::rtl::OUString, uno::Reference< rdf::XBlankNode > >
102         BlankNodeMap_t;
103 
104     BlankNodeMap_t m_BlankNodeMap;
105 
106 public:
107     RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext,
108             uno::Reference< rdf::XDocumentRepository > const & i_xRepository)
109         : m_xContext(i_xContext)
110         , m_xRepository(i_xRepository)
111     {}
112 
113     uno::Reference< rdf::XBlankNode >
114     LookupBlankNode(::rtl::OUString const & i_rNodeId );
115 
116     uno::Reference< rdf::XURI >
117     MakeURI( ::rtl::OUString const & i_rURI) const;
118 
119     uno::Reference< rdf::XResource>
120     MakeResource( ::rtl::OUString const & i_rResource);
121 
122     void InsertRDFaEntry(struct RDFaEntry const & i_rEntry);
123 };
124 
125 /** store parsed RDFa attributes */
126 struct SAL_DLLPRIVATE ParsedRDFaAttributes
127 {
128     ::rtl::OUString m_About;
129     ::std::vector< ::rtl::OUString > m_Properties;
130     ::rtl::OUString m_Content;
131     ::rtl::OUString m_Datatype;
132 
133     ParsedRDFaAttributes(
134             ::rtl::OUString const & i_rAbout,
135             ::std::vector< ::rtl::OUString > const & i_rProperties,
136             ::rtl::OUString const & i_rContent,
137             ::rtl::OUString const & i_rDatatype)
138         : m_About(i_rAbout)
139         , m_Properties(i_rProperties)
140         , m_Content(i_rContent)
141         , m_Datatype(i_rDatatype)
142     { }
143 };
144 
145 /** store metadatable object and its RDFa attributes */
146 struct SAL_DLLPRIVATE RDFaEntry
147 {
148     uno::Reference<rdf::XMetadatable> m_xObject;
149     ::boost::shared_ptr<ParsedRDFaAttributes> m_pRDFaAttributes;
150 
151     RDFaEntry(uno::Reference<rdf::XMetadatable> const & i_xObject,
152             ::boost::shared_ptr<ParsedRDFaAttributes> const& i_pRDFaAttributes)
153         : m_xObject(i_xObject)
154         , m_pRDFaAttributes(i_pRDFaAttributes)
155     { }
156 };
157 
158 ////////////////////////////////////////////////////////////////////////////
159 
160 
161 static inline bool isWS(const sal_Unicode i_Char)
162 {
163     return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char)
164         || (' ' == i_Char);
165 }
166 
167 static ::rtl::OUString splitAtWS(::rtl::OUString & io_rString)
168 {
169     const sal_Int32 len( io_rString.getLength() );
170     sal_Int32 idxstt(0);
171     while ((idxstt < len) && ( isWS(io_rString[idxstt])))
172         ++idxstt; // skip leading ws
173     sal_Int32 idxend(idxstt);
174     while ((idxend < len) && (!isWS(io_rString[idxend])))
175         ++idxend; // the CURIE
176     const ::rtl::OUString ret(io_rString.copy(idxstt, idxend - idxstt));
177     io_rString = io_rString.copy(idxend); // rest
178     return ret;
179 }
180 
181 ::rtl::OUString
182 RDFaReader::ReadCURIE(::rtl::OUString const & i_rCURIE) const
183 {
184     // the RDFa spec says that a prefix is required (it may be empty: ":foo")
185     const sal_Int32 idx( i_rCURIE.indexOf(':') );
186     if (idx >= 0)
187     {
188         ::rtl::OUString Prefix;
189         ::rtl::OUString LocalName;
190         ::rtl::OUString Namespace;
191         sal_uInt16 nKey( GetImport().GetNamespaceMap()._GetKeyByAttrName(
192             i_rCURIE, &Prefix, &LocalName, &Namespace) );
193         if (Prefix.equalsAscii("_"))
194         {
195             // eeek, it's a bnode!
196             // "_" is not a valid URI scheme => we can identify bnodes
197             return i_rCURIE;
198         }
199         else
200         {
201             OSL_ENSURE(XML_NAMESPACE_NONE != nKey, "no namespace?");
202             if ((XML_NAMESPACE_UNKNOWN != nKey) &&
203                 (XML_NAMESPACE_XMLNS   != nKey))
204             {
205                 // N.B.: empty LocalName is valid!
206                 const ::rtl::OUString URI(Namespace + LocalName);
207 //                return GetImport().GetAbsoluteReference(URI);
208                 return GetAbsoluteReference(URI);
209             }
210             else
211             {
212                 OSL_TRACE( "ReadCURIE: invalid CURIE: invalid prefix" );
213                 return ::rtl::OUString();
214             }
215         }
216     }
217     else
218     {
219         OSL_TRACE( "ReadCURIE: invalid CURIE: no prefix" );
220         return ::rtl::OUString();
221     }
222 }
223 
224 ::std::vector< ::rtl::OUString >
225 RDFaReader::ReadCURIEs(::rtl::OUString const & i_rCURIEs) const
226 {
227     std::vector< ::rtl::OUString > vec;
228     ::rtl::OUString CURIEs(i_rCURIEs);
229     do {
230       ::rtl::OUString curie( splitAtWS(CURIEs) );
231       if (curie.getLength())
232       {
233           const ::rtl::OUString uri(ReadCURIE(curie));
234           if (uri.getLength())
235           {
236               vec.push_back(uri);
237           }
238       }
239     }
240     while (CURIEs.getLength());
241     if (!vec.size())
242     {
243         OSL_TRACE( "ReadCURIEs: invalid CURIEs" );
244     }
245     return vec;
246 }
247 
248 ::rtl::OUString
249 RDFaReader::ReadURIOrSafeCURIE(::rtl::OUString const & i_rURIOrSafeCURIE) const
250 {
251     const sal_Int32 len(i_rURIOrSafeCURIE.getLength());
252     if (len && (i_rURIOrSafeCURIE[0] == '['))
253     {
254         if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']'))
255         {
256             return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2));
257         }
258         else
259         {
260             OSL_TRACE( "ReadURIOrSafeCURIE: invalid SafeCURIE" );
261             return ::rtl::OUString();
262         }
263     }
264     else
265     {
266         if (i_rURIOrSafeCURIE.matchAsciiL("_:", 2)) // blank node
267         {
268             OSL_TRACE( "ReadURIOrSafeCURIE: invalid URI: scheme is _" );
269             return ::rtl::OUString();
270         }
271         else
272         {
273 //            return GetImport().GetAbsoluteReference(i_rURIOrSafeCURIE);
274             return GetAbsoluteReference(i_rURIOrSafeCURIE);
275         }
276     }
277 }
278 
279 ////////////////////////////////////////////////////////////////////////////
280 
281 uno::Reference< rdf::XBlankNode >
282 RDFaInserter::LookupBlankNode(::rtl::OUString const & i_rNodeId )
283 {
284     uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] );
285     if (!rEntry.is())
286     {
287         rEntry = m_xRepository->createBlankNode();
288     }
289     return rEntry;
290 }
291 
292 uno::Reference< rdf::XURI >
293 RDFaInserter::MakeURI( ::rtl::OUString const & i_rURI) const
294 {
295     if (i_rURI.matchAsciiL("_:", 2)) // blank node
296     {
297         OSL_TRACE("MakeURI: cannot create URI for blank node");
298         return 0;
299     }
300     else
301     {
302         try
303         {
304             return rdf::URI::create( m_xContext, i_rURI );
305         }
306         catch (uno::Exception &)
307         {
308             OSL_ENSURE(false, "MakeURI: cannot create URI");
309             return 0;
310         }
311     }
312 }
313 
314 uno::Reference< rdf::XResource>
315 RDFaInserter::MakeResource( ::rtl::OUString const & i_rResource)
316 {
317     if (i_rResource.matchAsciiL("_:", 2)) // blank node
318     {
319         // we cannot use the blank node label as-is: it must be distinct
320         // from labels in other graphs, so create fresh ones per XML stream
321         // N.B.: content.xml and styles.xml are distinct graphs
322         ::rtl::OUString name( i_rResource.copy(2) );
323         const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) );
324         OSL_ENSURE(xBNode.is(), "no blank node?");
325         return uno::Reference<rdf::XResource>( xBNode, uno::UNO_QUERY);
326     }
327     else
328     {
329         return uno::Reference<rdf::XResource>( MakeURI( i_rResource ),
330             uno::UNO_QUERY);
331     }
332 }
333 
334 /** i wrote this because c++ implementations cannot agree on which variant
335     of boost::bind and std::mem_fun_ref applied to Reference::is compiles */
336 class ref_is_null :
337     public ::std::unary_function<sal_Bool, const uno::Reference<rdf::XURI> & >
338 {
339 public:
340     sal_Bool operator() (const uno::Reference<rdf::XURI> & i_rRef)
341     {
342         return !i_rRef.is();
343     }
344 };
345 
346 void RDFaInserter::InsertRDFaEntry(
347     struct RDFaEntry const & i_rEntry)
348 {
349     OSL_ENSURE(i_rEntry.m_xObject.is(),
350         "InsertRDFaEntry: invalid arg: null object");
351     if (!i_rEntry.m_xObject.is()) return;
352 
353     const uno::Reference< rdf::XResource > xSubject(
354         MakeResource( i_rEntry.m_pRDFaAttributes->m_About ) );
355     if (!xSubject.is())
356     {
357         return; // invalid
358     }
359 
360     ::comphelper::SequenceAsVector< uno::Reference< rdf::XURI > > predicates;
361 
362     predicates.reserve(i_rEntry.m_pRDFaAttributes->m_Properties.size());
363 
364     ::std::remove_copy_if(
365         ::boost::make_transform_iterator(
366             i_rEntry.m_pRDFaAttributes->m_Properties.begin(),
367             ::boost::bind(&RDFaInserter::MakeURI, this, _1)),
368         // argh, this must be the same type :(
369         ::boost::make_transform_iterator(
370             i_rEntry.m_pRDFaAttributes->m_Properties.end(),
371             ::boost::bind(&RDFaInserter::MakeURI, this, _1)),
372         ::std::back_inserter(predicates),
373         ref_is_null() );
374         // compiles only on wntmsci12
375 //        ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool>(&uno::Reference<rdf::XURI>::is, _1)));
376         // compiles on unxsoli4, wntsci12, but not unxlngi6
377 //        ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool, com::sun::star::uno::Reference<rdf::XURI> >(&uno::Reference<rdf::XURI>::is, _1)));
378         // compiles on unxsoli4, unxlngi6, but not wntsci12
379 //        ::std::not1( ::std::mem_fun_ref(&uno::Reference<rdf::XURI>::is)) );
380 
381     if (!predicates.size())
382     {
383         return; // invalid
384     }
385 
386     uno::Reference<rdf::XURI> xDatatype;
387     if (i_rEntry.m_pRDFaAttributes->m_Datatype.getLength())
388     {
389         xDatatype = MakeURI( i_rEntry.m_pRDFaAttributes->m_Datatype );
390     }
391 
392     try
393     {
394         // N.B.: this will call xMeta->ensureMetadataReference, which is why
395         // this must be done _after_ importing the whole XML file,
396         // to prevent collision between generated ids and ids in the file
397         m_xRepository->setStatementRDFa(xSubject, predicates.getAsConstList(),
398             i_rEntry.m_xObject,
399             i_rEntry.m_pRDFaAttributes->m_Content, xDatatype);
400     }
401     catch (uno::Exception &)
402     {
403         OSL_ENSURE(false, "InsertRDFaEntry: setStatementRDFa failed?");
404     }
405 }
406 
407 ////////////////////////////////////////////////////////////////////////////
408 
409 RDFaImportHelper::RDFaImportHelper(const SvXMLImport & i_rImport)
410     : m_rImport(i_rImport)
411 {
412 }
413 
414 RDFaImportHelper::~RDFaImportHelper()
415 {
416 }
417 
418 ::boost::shared_ptr<ParsedRDFaAttributes>
419 RDFaImportHelper::ParseRDFa(
420     ::rtl::OUString const & i_rAbout,
421     ::rtl::OUString const & i_rProperty,
422     ::rtl::OUString const & i_rContent,
423     ::rtl::OUString const & i_rDatatype)
424 {
425     if (!i_rProperty.getLength())
426     {
427         OSL_TRACE("AddRDFa: invalid input: xhtml:property empty");
428         return ::boost::shared_ptr<ParsedRDFaAttributes>();
429     }
430     // must parse CURIEs here: need namespace declaration context
431     RDFaReader reader(GetImport());
432     const ::rtl::OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) );
433     if (!about.getLength()) {
434         return ::boost::shared_ptr<ParsedRDFaAttributes>();
435     }
436     const ::std::vector< ::rtl::OUString > properties(
437         reader.ReadCURIEs(i_rProperty) );
438     if (!properties.size()) {
439         return ::boost::shared_ptr<ParsedRDFaAttributes>();
440     }
441     const ::rtl::OUString datatype( i_rDatatype.getLength()
442         ?   reader.ReadCURIE(i_rDatatype)
443         :   ::rtl::OUString() );
444     return ::boost::shared_ptr<ParsedRDFaAttributes>(
445             new ParsedRDFaAttributes(about, properties, i_rContent, datatype));
446 }
447 
448 void
449 RDFaImportHelper::AddRDFa(
450     uno::Reference<rdf::XMetadatable> const & i_xObject,
451     ::boost::shared_ptr<ParsedRDFaAttributes> & i_pRDFaAttributes)
452 {
453     if (!i_xObject.is())
454     {
455         OSL_ENSURE(false, "AddRDFa: invalid arg: null textcontent");
456         return;
457     }
458     if (!i_pRDFaAttributes.get())
459     {
460         OSL_ENSURE(false, "AddRDFa: invalid arg: null RDFa attributes");
461         return;
462     }
463     m_RDFaEntries.push_back(RDFaEntry(i_xObject, i_pRDFaAttributes));
464 }
465 
466 void
467 RDFaImportHelper::ParseAndAddRDFa(
468     uno::Reference<rdf::XMetadatable> const & i_xObject,
469     ::rtl::OUString const & i_rAbout,
470     ::rtl::OUString const & i_rProperty,
471     ::rtl::OUString const & i_rContent,
472     ::rtl::OUString const & i_rDatatype)
473 {
474     ::boost::shared_ptr<ParsedRDFaAttributes> pAttributes(
475         ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) );
476     if (pAttributes.get())
477     {
478         AddRDFa(i_xObject, pAttributes);
479     }
480 }
481 
482 void RDFaImportHelper::InsertRDFa(
483     uno::Reference< rdf::XRepositorySupplier> const & i_xModel)
484 {
485     OSL_ENSURE(i_xModel.is(), "InsertRDFa: invalid arg: model null");
486     if (!i_xModel.is()) return;
487     const uno::Reference< rdf::XDocumentRepository > xRepository(
488         i_xModel->getRDFRepository(), uno::UNO_QUERY);
489     OSL_ENSURE(xRepository.is(), "InsertRDFa: no DocumentRepository?");
490     if (!xRepository.is()) return;
491     RDFaInserter inserter(GetImport().GetComponentContext(), xRepository);
492     ::std::for_each(m_RDFaEntries.begin(), m_RDFaEntries.end(),
493         ::boost::bind(&RDFaInserter::InsertRDFaEntry, &inserter, _1));
494 }
495 
496 } // namespace xmloff
497 
498