1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include "precompiled_xmloff.hxx"
29 
30 #include "RDFaImportHelper.hxx"
31 
32 #include <xmloff/xmlimp.hxx>
33 #include <xmloff/nmspmap.hxx>
34 
35 #include <comphelper/sequenceasvector.hxx>
36 
37 #include <tools/string.hxx> // for GetAbsoluteReference
38 
39 #include <com/sun/star/rdf/URI.hpp>
40 #include <com/sun/star/rdf/XDocumentMetadataAccess.hpp>
41 #include <com/sun/star/rdf/XDocumentRepository.hpp>
42 
43 #include <rtl/ustring.hxx>
44 
45 #include <boost/bind.hpp>
46 #include <boost/iterator_adaptors.hpp>
47 #ifndef BOOST_ITERATOR_ADAPTOR_DWA053000_HPP_ // from iterator_adaptors.hpp
48 // N.B.: the check for the header guard _of a specific version of boost_
49 //       is here so this may work on different versions of boost,
50 //       which sadly put the goods in different header files
51 #include <boost/iterator/transform_iterator.hpp>
52 #endif
53 
54 #include <map>
55 #include <iterator>
56 #include <functional>
57 #include <algorithm>
58 
59 
60 using namespace ::com::sun::star;
61 
62 namespace xmloff {
63 
64 /** a bit of context for parsing RDFa attributes */
65 class SAL_DLLPRIVATE RDFaReader
66 {
67     const SvXMLImport & m_rImport;
68 
69     const SvXMLImport & GetImport() const { return m_rImport; }
70 
71     //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute
72     ::rtl::OUString GetAbsoluteReference(::rtl::OUString const & i_rURI) const
73     {
74         if (!i_rURI.getLength() || i_rURI[0] == '#')
75         {
76             return GetImport().GetBaseURL() + i_rURI;
77         }
78         else
79         {
80             return GetImport().GetAbsoluteReference(i_rURI);
81         }
82     }
83 
84 public:
85     RDFaReader(SvXMLImport const & i_rImport)
86         : m_rImport(i_rImport)
87     { }
88 
89     // returns URI or blank node!
90     ::rtl::OUString ReadCURIE(::rtl::OUString const & i_rCURIE) const;
91 
92     std::vector< ::rtl::OUString >
93     ReadCURIEs(::rtl::OUString const & i_rCURIEs) const;
94 
95     ::rtl::OUString
96     ReadURIOrSafeCURIE( ::rtl::OUString const & i_rURIOrSafeCURIE) const;
97 };
98 
99 /** helper to insert RDFa statements into the RDF repository */
100 class SAL_DLLPRIVATE RDFaInserter
101 {
102     const uno::Reference<uno::XComponentContext> m_xContext;
103     uno::Reference< rdf::XDocumentRepository > m_xRepository;
104 
105     typedef ::std::map< ::rtl::OUString, uno::Reference< rdf::XBlankNode > >
106         BlankNodeMap_t;
107 
108     BlankNodeMap_t m_BlankNodeMap;
109 
110 public:
111     RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext,
112             uno::Reference< rdf::XDocumentRepository > const & i_xRepository)
113         : m_xContext(i_xContext)
114         , m_xRepository(i_xRepository)
115     {}
116 
117     uno::Reference< rdf::XBlankNode >
118     LookupBlankNode(::rtl::OUString const & i_rNodeId );
119 
120     uno::Reference< rdf::XURI >
121     MakeURI( ::rtl::OUString const & i_rURI) const;
122 
123     uno::Reference< rdf::XResource>
124     MakeResource( ::rtl::OUString const & i_rResource);
125 
126     void InsertRDFaEntry(struct RDFaEntry const & i_rEntry);
127 };
128 
129 /** store parsed RDFa attributes */
130 struct SAL_DLLPRIVATE ParsedRDFaAttributes
131 {
132     ::rtl::OUString m_About;
133     ::std::vector< ::rtl::OUString > m_Properties;
134     ::rtl::OUString m_Content;
135     ::rtl::OUString m_Datatype;
136 
137     ParsedRDFaAttributes(
138             ::rtl::OUString const & i_rAbout,
139             ::std::vector< ::rtl::OUString > const & i_rProperties,
140             ::rtl::OUString const & i_rContent,
141             ::rtl::OUString const & i_rDatatype)
142         : m_About(i_rAbout)
143         , m_Properties(i_rProperties)
144         , m_Content(i_rContent)
145         , m_Datatype(i_rDatatype)
146     { }
147 };
148 
149 /** store metadatable object and its RDFa attributes */
150 struct SAL_DLLPRIVATE RDFaEntry
151 {
152     uno::Reference<rdf::XMetadatable> m_xObject;
153     ::boost::shared_ptr<ParsedRDFaAttributes> m_pRDFaAttributes;
154 
155     RDFaEntry(uno::Reference<rdf::XMetadatable> const & i_xObject,
156             ::boost::shared_ptr<ParsedRDFaAttributes> const& i_pRDFaAttributes)
157         : m_xObject(i_xObject)
158         , m_pRDFaAttributes(i_pRDFaAttributes)
159     { }
160 };
161 
162 ////////////////////////////////////////////////////////////////////////////
163 
164 
165 static inline bool isWS(const sal_Unicode i_Char)
166 {
167     return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char)
168         || (' ' == i_Char);
169 }
170 
171 static ::rtl::OUString splitAtWS(::rtl::OUString & io_rString)
172 {
173     const sal_Int32 len( io_rString.getLength() );
174     sal_Int32 idxstt(0);
175     while ((idxstt < len) && ( isWS(io_rString[idxstt])))
176         ++idxstt; // skip leading ws
177     sal_Int32 idxend(idxstt);
178     while ((idxend < len) && (!isWS(io_rString[idxend])))
179         ++idxend; // the CURIE
180     const ::rtl::OUString ret(io_rString.copy(idxstt, idxend - idxstt));
181     io_rString = io_rString.copy(idxend); // rest
182     return ret;
183 }
184 
185 ::rtl::OUString
186 RDFaReader::ReadCURIE(::rtl::OUString const & i_rCURIE) const
187 {
188     // the RDFa spec says that a prefix is required (it may be empty: ":foo")
189     const sal_Int32 idx( i_rCURIE.indexOf(':') );
190     if (idx >= 0)
191     {
192         ::rtl::OUString Prefix;
193         ::rtl::OUString LocalName;
194         ::rtl::OUString Namespace;
195         sal_uInt16 nKey( GetImport().GetNamespaceMap()._GetKeyByAttrName(
196             i_rCURIE, &Prefix, &LocalName, &Namespace) );
197         if (Prefix.equalsAscii("_"))
198         {
199             // eeek, it's a bnode!
200             // "_" is not a valid URI scheme => we can identify bnodes
201             return i_rCURIE;
202         }
203         else
204         {
205             OSL_ENSURE(XML_NAMESPACE_NONE != nKey, "no namespace?");
206             if ((XML_NAMESPACE_UNKNOWN != nKey) &&
207                 (XML_NAMESPACE_XMLNS   != nKey))
208             {
209                 // N.B.: empty LocalName is valid!
210                 const ::rtl::OUString URI(Namespace + LocalName);
211 //                return GetImport().GetAbsoluteReference(URI);
212                 return GetAbsoluteReference(URI);
213             }
214             else
215             {
216                 OSL_TRACE( "ReadCURIE: invalid CURIE: invalid prefix" );
217                 return ::rtl::OUString();
218             }
219         }
220     }
221     else
222     {
223         OSL_TRACE( "ReadCURIE: invalid CURIE: no prefix" );
224         return ::rtl::OUString();
225     }
226 }
227 
228 ::std::vector< ::rtl::OUString >
229 RDFaReader::ReadCURIEs(::rtl::OUString const & i_rCURIEs) const
230 {
231     std::vector< ::rtl::OUString > vec;
232     ::rtl::OUString CURIEs(i_rCURIEs);
233     do {
234       ::rtl::OUString curie( splitAtWS(CURIEs) );
235       if (curie.getLength())
236       {
237           const ::rtl::OUString uri(ReadCURIE(curie));
238           if (uri.getLength())
239           {
240               vec.push_back(uri);
241           }
242       }
243     }
244     while (CURIEs.getLength());
245     if (!vec.size())
246     {
247         OSL_TRACE( "ReadCURIEs: invalid CURIEs" );
248     }
249     return vec;
250 }
251 
252 ::rtl::OUString
253 RDFaReader::ReadURIOrSafeCURIE(::rtl::OUString const & i_rURIOrSafeCURIE) const
254 {
255     const sal_Int32 len(i_rURIOrSafeCURIE.getLength());
256     if (len && (i_rURIOrSafeCURIE[0] == '['))
257     {
258         if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']'))
259         {
260             return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2));
261         }
262         else
263         {
264             OSL_TRACE( "ReadURIOrSafeCURIE: invalid SafeCURIE" );
265             return ::rtl::OUString();
266         }
267     }
268     else
269     {
270         if (i_rURIOrSafeCURIE.matchAsciiL("_:", 2)) // blank node
271         {
272             OSL_TRACE( "ReadURIOrSafeCURIE: invalid URI: scheme is _" );
273             return ::rtl::OUString();
274         }
275         else
276         {
277 //            return GetImport().GetAbsoluteReference(i_rURIOrSafeCURIE);
278             return GetAbsoluteReference(i_rURIOrSafeCURIE);
279         }
280     }
281 }
282 
283 ////////////////////////////////////////////////////////////////////////////
284 
285 uno::Reference< rdf::XBlankNode >
286 RDFaInserter::LookupBlankNode(::rtl::OUString const & i_rNodeId )
287 {
288     uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] );
289     if (!rEntry.is())
290     {
291         rEntry = m_xRepository->createBlankNode();
292     }
293     return rEntry;
294 }
295 
296 uno::Reference< rdf::XURI >
297 RDFaInserter::MakeURI( ::rtl::OUString const & i_rURI) const
298 {
299     if (i_rURI.matchAsciiL("_:", 2)) // blank node
300     {
301         OSL_TRACE("MakeURI: cannot create URI for blank node");
302         return 0;
303     }
304     else
305     {
306         try
307         {
308             return rdf::URI::create( m_xContext, i_rURI );
309         }
310         catch (uno::Exception &)
311         {
312             OSL_ENSURE(false, "MakeURI: cannot create URI");
313             return 0;
314         }
315     }
316 }
317 
318 uno::Reference< rdf::XResource>
319 RDFaInserter::MakeResource( ::rtl::OUString const & i_rResource)
320 {
321     if (i_rResource.matchAsciiL("_:", 2)) // blank node
322     {
323         // we cannot use the blank node label as-is: it must be distinct
324         // from labels in other graphs, so create fresh ones per XML stream
325         // N.B.: content.xml and styles.xml are distinct graphs
326         ::rtl::OUString name( i_rResource.copy(2) );
327         const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) );
328         OSL_ENSURE(xBNode.is(), "no blank node?");
329         return uno::Reference<rdf::XResource>( xBNode, uno::UNO_QUERY);
330     }
331     else
332     {
333         return uno::Reference<rdf::XResource>( MakeURI( i_rResource ),
334             uno::UNO_QUERY);
335     }
336 }
337 
338 /** i wrote this because c++ implementations cannot agree on which variant
339     of boost::bind and std::mem_fun_ref applied to Reference::is compiles */
340 class ref_is_null :
341     public ::std::unary_function<sal_Bool, const uno::Reference<rdf::XURI> & >
342 {
343 public:
344     sal_Bool operator() (const uno::Reference<rdf::XURI> & i_rRef)
345     {
346         return !i_rRef.is();
347     }
348 };
349 
350 void RDFaInserter::InsertRDFaEntry(
351     struct RDFaEntry const & i_rEntry)
352 {
353     OSL_ENSURE(i_rEntry.m_xObject.is(),
354         "InsertRDFaEntry: invalid arg: null object");
355     if (!i_rEntry.m_xObject.is()) return;
356 
357     const uno::Reference< rdf::XResource > xSubject(
358         MakeResource( i_rEntry.m_pRDFaAttributes->m_About ) );
359     if (!xSubject.is())
360     {
361         return; // invalid
362     }
363 
364     ::comphelper::SequenceAsVector< uno::Reference< rdf::XURI > > predicates;
365 
366     predicates.reserve(i_rEntry.m_pRDFaAttributes->m_Properties.size());
367 
368     ::std::remove_copy_if(
369         ::boost::make_transform_iterator(
370             i_rEntry.m_pRDFaAttributes->m_Properties.begin(),
371             ::boost::bind(&RDFaInserter::MakeURI, this, _1)),
372         // argh, this must be the same type :(
373         ::boost::make_transform_iterator(
374             i_rEntry.m_pRDFaAttributes->m_Properties.end(),
375             ::boost::bind(&RDFaInserter::MakeURI, this, _1)),
376         ::std::back_inserter(predicates),
377         ref_is_null() );
378         // compiles only on wntmsci12
379 //        ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool>(&uno::Reference<rdf::XURI>::is, _1)));
380         // compiles on unxsoli4, wntsci12, but not unxlngi6
381 //        ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool, com::sun::star::uno::Reference<rdf::XURI> >(&uno::Reference<rdf::XURI>::is, _1)));
382         // compiles on unxsoli4, unxlngi6, but not wntsci12
383 //        ::std::not1( ::std::mem_fun_ref(&uno::Reference<rdf::XURI>::is)) );
384 
385     if (!predicates.size())
386     {
387         return; // invalid
388     }
389 
390     uno::Reference<rdf::XURI> xDatatype;
391     if (i_rEntry.m_pRDFaAttributes->m_Datatype.getLength())
392     {
393         xDatatype = MakeURI( i_rEntry.m_pRDFaAttributes->m_Datatype );
394     }
395 
396     try
397     {
398         // N.B.: this will call xMeta->ensureMetadataReference, which is why
399         // this must be done _after_ importing the whole XML file,
400         // to prevent collision between generated ids and ids in the file
401         m_xRepository->setStatementRDFa(xSubject, predicates.getAsConstList(),
402             i_rEntry.m_xObject,
403             i_rEntry.m_pRDFaAttributes->m_Content, xDatatype);
404     }
405     catch (uno::Exception &)
406     {
407         OSL_ENSURE(false, "InsertRDFaEntry: setStatementRDFa failed?");
408     }
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////
412 
413 RDFaImportHelper::RDFaImportHelper(const SvXMLImport & i_rImport)
414     : m_rImport(i_rImport)
415 {
416 }
417 
418 RDFaImportHelper::~RDFaImportHelper()
419 {
420 }
421 
422 ::boost::shared_ptr<ParsedRDFaAttributes>
423 RDFaImportHelper::ParseRDFa(
424     ::rtl::OUString const & i_rAbout,
425     ::rtl::OUString const & i_rProperty,
426     ::rtl::OUString const & i_rContent,
427     ::rtl::OUString const & i_rDatatype)
428 {
429     if (!i_rProperty.getLength())
430     {
431         OSL_TRACE("AddRDFa: invalid input: xhtml:property empty");
432         return ::boost::shared_ptr<ParsedRDFaAttributes>();
433     }
434     // must parse CURIEs here: need namespace declaration context
435     RDFaReader reader(GetImport());
436     const ::rtl::OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) );
437     if (!about.getLength()) {
438         return ::boost::shared_ptr<ParsedRDFaAttributes>();
439     }
440     const ::std::vector< ::rtl::OUString > properties(
441         reader.ReadCURIEs(i_rProperty) );
442     if (!properties.size()) {
443         return ::boost::shared_ptr<ParsedRDFaAttributes>();
444     }
445     const ::rtl::OUString datatype( i_rDatatype.getLength()
446         ?   reader.ReadCURIE(i_rDatatype)
447         :   ::rtl::OUString() );
448     return ::boost::shared_ptr<ParsedRDFaAttributes>(
449             new ParsedRDFaAttributes(about, properties, i_rContent, datatype));
450 }
451 
452 void
453 RDFaImportHelper::AddRDFa(
454     uno::Reference<rdf::XMetadatable> const & i_xObject,
455     ::boost::shared_ptr<ParsedRDFaAttributes> & i_pRDFaAttributes)
456 {
457     if (!i_xObject.is())
458     {
459         OSL_ENSURE(false, "AddRDFa: invalid arg: null textcontent");
460         return;
461     }
462     if (!i_pRDFaAttributes.get())
463     {
464         OSL_ENSURE(false, "AddRDFa: invalid arg: null RDFa attributes");
465         return;
466     }
467     m_RDFaEntries.push_back(RDFaEntry(i_xObject, i_pRDFaAttributes));
468 }
469 
470 void
471 RDFaImportHelper::ParseAndAddRDFa(
472     uno::Reference<rdf::XMetadatable> const & i_xObject,
473     ::rtl::OUString const & i_rAbout,
474     ::rtl::OUString const & i_rProperty,
475     ::rtl::OUString const & i_rContent,
476     ::rtl::OUString const & i_rDatatype)
477 {
478     ::boost::shared_ptr<ParsedRDFaAttributes> pAttributes(
479         ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) );
480     if (pAttributes.get())
481     {
482         AddRDFa(i_xObject, pAttributes);
483     }
484 }
485 
486 void RDFaImportHelper::InsertRDFa(
487     uno::Reference< rdf::XRepositorySupplier> const & i_xModel)
488 {
489     OSL_ENSURE(i_xModel.is(), "InsertRDFa: invalid arg: model null");
490     if (!i_xModel.is()) return;
491     const uno::Reference< rdf::XDocumentRepository > xRepository(
492         i_xModel->getRDFRepository(), uno::UNO_QUERY);
493     OSL_ENSURE(xRepository.is(), "InsertRDFa: no DocumentRepository?");
494     if (!xRepository.is()) return;
495     RDFaInserter inserter(GetImport().GetComponentContext(), xRepository);
496     ::std::for_each(m_RDFaEntries.begin(), m_RDFaEntries.end(),
497         ::boost::bind(&RDFaInserter::InsertRDFaEntry, &inserter, _1));
498 }
499 
500 } // namespace xmloff
501 
502