1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include "precompiled_xmloff.hxx" 29 30 #include "RDFaImportHelper.hxx" 31 32 #include <xmloff/xmlimp.hxx> 33 #include <xmloff/nmspmap.hxx> 34 35 #include <comphelper/sequenceasvector.hxx> 36 37 #include <tools/string.hxx> // for GetAbsoluteReference 38 39 #include <com/sun/star/rdf/URI.hpp> 40 #include <com/sun/star/rdf/XDocumentMetadataAccess.hpp> 41 #include <com/sun/star/rdf/XDocumentRepository.hpp> 42 43 #include <rtl/ustring.hxx> 44 45 #include <boost/bind.hpp> 46 #include <boost/iterator_adaptors.hpp> 47 #ifndef BOOST_ITERATOR_ADAPTOR_DWA053000_HPP_ // from iterator_adaptors.hpp 48 // N.B.: the check for the header guard _of a specific version of boost_ 49 // is here so this may work on different versions of boost, 50 // which sadly put the goods in different header files 51 #include <boost/iterator/transform_iterator.hpp> 52 #endif 53 54 #include <map> 55 #include <iterator> 56 #include <functional> 57 #include <algorithm> 58 59 60 using namespace ::com::sun::star; 61 62 namespace xmloff { 63 64 /** a bit of context for parsing RDFa attributes */ 65 class SAL_DLLPRIVATE RDFaReader 66 { 67 const SvXMLImport & m_rImport; 68 69 const SvXMLImport & GetImport() const { return m_rImport; } 70 71 //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute 72 ::rtl::OUString GetAbsoluteReference(::rtl::OUString const & i_rURI) const 73 { 74 if (!i_rURI.getLength() || i_rURI[0] == '#') 75 { 76 return GetImport().GetBaseURL() + i_rURI; 77 } 78 else 79 { 80 return GetImport().GetAbsoluteReference(i_rURI); 81 } 82 } 83 84 public: 85 RDFaReader(SvXMLImport const & i_rImport) 86 : m_rImport(i_rImport) 87 { } 88 89 // returns URI or blank node! 90 ::rtl::OUString ReadCURIE(::rtl::OUString const & i_rCURIE) const; 91 92 std::vector< ::rtl::OUString > 93 ReadCURIEs(::rtl::OUString const & i_rCURIEs) const; 94 95 ::rtl::OUString 96 ReadURIOrSafeCURIE( ::rtl::OUString const & i_rURIOrSafeCURIE) const; 97 }; 98 99 /** helper to insert RDFa statements into the RDF repository */ 100 class SAL_DLLPRIVATE RDFaInserter 101 { 102 const uno::Reference<uno::XComponentContext> m_xContext; 103 uno::Reference< rdf::XDocumentRepository > m_xRepository; 104 105 typedef ::std::map< ::rtl::OUString, uno::Reference< rdf::XBlankNode > > 106 BlankNodeMap_t; 107 108 BlankNodeMap_t m_BlankNodeMap; 109 110 public: 111 RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext, 112 uno::Reference< rdf::XDocumentRepository > const & i_xRepository) 113 : m_xContext(i_xContext) 114 , m_xRepository(i_xRepository) 115 {} 116 117 uno::Reference< rdf::XBlankNode > 118 LookupBlankNode(::rtl::OUString const & i_rNodeId ); 119 120 uno::Reference< rdf::XURI > 121 MakeURI( ::rtl::OUString const & i_rURI) const; 122 123 uno::Reference< rdf::XResource> 124 MakeResource( ::rtl::OUString const & i_rResource); 125 126 void InsertRDFaEntry(struct RDFaEntry const & i_rEntry); 127 }; 128 129 /** store parsed RDFa attributes */ 130 struct SAL_DLLPRIVATE ParsedRDFaAttributes 131 { 132 ::rtl::OUString m_About; 133 ::std::vector< ::rtl::OUString > m_Properties; 134 ::rtl::OUString m_Content; 135 ::rtl::OUString m_Datatype; 136 137 ParsedRDFaAttributes( 138 ::rtl::OUString const & i_rAbout, 139 ::std::vector< ::rtl::OUString > const & i_rProperties, 140 ::rtl::OUString const & i_rContent, 141 ::rtl::OUString const & i_rDatatype) 142 : m_About(i_rAbout) 143 , m_Properties(i_rProperties) 144 , m_Content(i_rContent) 145 , m_Datatype(i_rDatatype) 146 { } 147 }; 148 149 /** store metadatable object and its RDFa attributes */ 150 struct SAL_DLLPRIVATE RDFaEntry 151 { 152 uno::Reference<rdf::XMetadatable> m_xObject; 153 ::boost::shared_ptr<ParsedRDFaAttributes> m_pRDFaAttributes; 154 155 RDFaEntry(uno::Reference<rdf::XMetadatable> const & i_xObject, 156 ::boost::shared_ptr<ParsedRDFaAttributes> const& i_pRDFaAttributes) 157 : m_xObject(i_xObject) 158 , m_pRDFaAttributes(i_pRDFaAttributes) 159 { } 160 }; 161 162 //////////////////////////////////////////////////////////////////////////// 163 164 165 static inline bool isWS(const sal_Unicode i_Char) 166 { 167 return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char) 168 || (' ' == i_Char); 169 } 170 171 static ::rtl::OUString splitAtWS(::rtl::OUString & io_rString) 172 { 173 const sal_Int32 len( io_rString.getLength() ); 174 sal_Int32 idxstt(0); 175 while ((idxstt < len) && ( isWS(io_rString[idxstt]))) 176 ++idxstt; // skip leading ws 177 sal_Int32 idxend(idxstt); 178 while ((idxend < len) && (!isWS(io_rString[idxend]))) 179 ++idxend; // the CURIE 180 const ::rtl::OUString ret(io_rString.copy(idxstt, idxend - idxstt)); 181 io_rString = io_rString.copy(idxend); // rest 182 return ret; 183 } 184 185 ::rtl::OUString 186 RDFaReader::ReadCURIE(::rtl::OUString const & i_rCURIE) const 187 { 188 // the RDFa spec says that a prefix is required (it may be empty: ":foo") 189 const sal_Int32 idx( i_rCURIE.indexOf(':') ); 190 if (idx >= 0) 191 { 192 ::rtl::OUString Prefix; 193 ::rtl::OUString LocalName; 194 ::rtl::OUString Namespace; 195 sal_uInt16 nKey( GetImport().GetNamespaceMap()._GetKeyByAttrName( 196 i_rCURIE, &Prefix, &LocalName, &Namespace) ); 197 if (Prefix.equalsAscii("_")) 198 { 199 // eeek, it's a bnode! 200 // "_" is not a valid URI scheme => we can identify bnodes 201 return i_rCURIE; 202 } 203 else 204 { 205 OSL_ENSURE(XML_NAMESPACE_NONE != nKey, "no namespace?"); 206 if ((XML_NAMESPACE_UNKNOWN != nKey) && 207 (XML_NAMESPACE_XMLNS != nKey)) 208 { 209 // N.B.: empty LocalName is valid! 210 const ::rtl::OUString URI(Namespace + LocalName); 211 // return GetImport().GetAbsoluteReference(URI); 212 return GetAbsoluteReference(URI); 213 } 214 else 215 { 216 OSL_TRACE( "ReadCURIE: invalid CURIE: invalid prefix" ); 217 return ::rtl::OUString(); 218 } 219 } 220 } 221 else 222 { 223 OSL_TRACE( "ReadCURIE: invalid CURIE: no prefix" ); 224 return ::rtl::OUString(); 225 } 226 } 227 228 ::std::vector< ::rtl::OUString > 229 RDFaReader::ReadCURIEs(::rtl::OUString const & i_rCURIEs) const 230 { 231 std::vector< ::rtl::OUString > vec; 232 ::rtl::OUString CURIEs(i_rCURIEs); 233 do { 234 ::rtl::OUString curie( splitAtWS(CURIEs) ); 235 if (curie.getLength()) 236 { 237 const ::rtl::OUString uri(ReadCURIE(curie)); 238 if (uri.getLength()) 239 { 240 vec.push_back(uri); 241 } 242 } 243 } 244 while (CURIEs.getLength()); 245 if (!vec.size()) 246 { 247 OSL_TRACE( "ReadCURIEs: invalid CURIEs" ); 248 } 249 return vec; 250 } 251 252 ::rtl::OUString 253 RDFaReader::ReadURIOrSafeCURIE(::rtl::OUString const & i_rURIOrSafeCURIE) const 254 { 255 const sal_Int32 len(i_rURIOrSafeCURIE.getLength()); 256 if (len && (i_rURIOrSafeCURIE[0] == '[')) 257 { 258 if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']')) 259 { 260 return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2)); 261 } 262 else 263 { 264 OSL_TRACE( "ReadURIOrSafeCURIE: invalid SafeCURIE" ); 265 return ::rtl::OUString(); 266 } 267 } 268 else 269 { 270 if (i_rURIOrSafeCURIE.matchAsciiL("_:", 2)) // blank node 271 { 272 OSL_TRACE( "ReadURIOrSafeCURIE: invalid URI: scheme is _" ); 273 return ::rtl::OUString(); 274 } 275 else 276 { 277 // return GetImport().GetAbsoluteReference(i_rURIOrSafeCURIE); 278 return GetAbsoluteReference(i_rURIOrSafeCURIE); 279 } 280 } 281 } 282 283 //////////////////////////////////////////////////////////////////////////// 284 285 uno::Reference< rdf::XBlankNode > 286 RDFaInserter::LookupBlankNode(::rtl::OUString const & i_rNodeId ) 287 { 288 uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] ); 289 if (!rEntry.is()) 290 { 291 rEntry = m_xRepository->createBlankNode(); 292 } 293 return rEntry; 294 } 295 296 uno::Reference< rdf::XURI > 297 RDFaInserter::MakeURI( ::rtl::OUString const & i_rURI) const 298 { 299 if (i_rURI.matchAsciiL("_:", 2)) // blank node 300 { 301 OSL_TRACE("MakeURI: cannot create URI for blank node"); 302 return 0; 303 } 304 else 305 { 306 try 307 { 308 return rdf::URI::create( m_xContext, i_rURI ); 309 } 310 catch (uno::Exception &) 311 { 312 OSL_ENSURE(false, "MakeURI: cannot create URI"); 313 return 0; 314 } 315 } 316 } 317 318 uno::Reference< rdf::XResource> 319 RDFaInserter::MakeResource( ::rtl::OUString const & i_rResource) 320 { 321 if (i_rResource.matchAsciiL("_:", 2)) // blank node 322 { 323 // we cannot use the blank node label as-is: it must be distinct 324 // from labels in other graphs, so create fresh ones per XML stream 325 // N.B.: content.xml and styles.xml are distinct graphs 326 ::rtl::OUString name( i_rResource.copy(2) ); 327 const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) ); 328 OSL_ENSURE(xBNode.is(), "no blank node?"); 329 return uno::Reference<rdf::XResource>( xBNode, uno::UNO_QUERY); 330 } 331 else 332 { 333 return uno::Reference<rdf::XResource>( MakeURI( i_rResource ), 334 uno::UNO_QUERY); 335 } 336 } 337 338 /** i wrote this because c++ implementations cannot agree on which variant 339 of boost::bind and std::mem_fun_ref applied to Reference::is compiles */ 340 class ref_is_null : 341 public ::std::unary_function<sal_Bool, const uno::Reference<rdf::XURI> & > 342 { 343 public: 344 sal_Bool operator() (const uno::Reference<rdf::XURI> & i_rRef) 345 { 346 return !i_rRef.is(); 347 } 348 }; 349 350 void RDFaInserter::InsertRDFaEntry( 351 struct RDFaEntry const & i_rEntry) 352 { 353 OSL_ENSURE(i_rEntry.m_xObject.is(), 354 "InsertRDFaEntry: invalid arg: null object"); 355 if (!i_rEntry.m_xObject.is()) return; 356 357 const uno::Reference< rdf::XResource > xSubject( 358 MakeResource( i_rEntry.m_pRDFaAttributes->m_About ) ); 359 if (!xSubject.is()) 360 { 361 return; // invalid 362 } 363 364 ::comphelper::SequenceAsVector< uno::Reference< rdf::XURI > > predicates; 365 366 predicates.reserve(i_rEntry.m_pRDFaAttributes->m_Properties.size()); 367 368 ::std::remove_copy_if( 369 ::boost::make_transform_iterator( 370 i_rEntry.m_pRDFaAttributes->m_Properties.begin(), 371 ::boost::bind(&RDFaInserter::MakeURI, this, _1)), 372 // argh, this must be the same type :( 373 ::boost::make_transform_iterator( 374 i_rEntry.m_pRDFaAttributes->m_Properties.end(), 375 ::boost::bind(&RDFaInserter::MakeURI, this, _1)), 376 ::std::back_inserter(predicates), 377 ref_is_null() ); 378 // compiles only on wntmsci12 379 // ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool>(&uno::Reference<rdf::XURI>::is, _1))); 380 // compiles on unxsoli4, wntsci12, but not unxlngi6 381 // ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool, com::sun::star::uno::Reference<rdf::XURI> >(&uno::Reference<rdf::XURI>::is, _1))); 382 // compiles on unxsoli4, unxlngi6, but not wntsci12 383 // ::std::not1( ::std::mem_fun_ref(&uno::Reference<rdf::XURI>::is)) ); 384 385 if (!predicates.size()) 386 { 387 return; // invalid 388 } 389 390 uno::Reference<rdf::XURI> xDatatype; 391 if (i_rEntry.m_pRDFaAttributes->m_Datatype.getLength()) 392 { 393 xDatatype = MakeURI( i_rEntry.m_pRDFaAttributes->m_Datatype ); 394 } 395 396 try 397 { 398 // N.B.: this will call xMeta->ensureMetadataReference, which is why 399 // this must be done _after_ importing the whole XML file, 400 // to prevent collision between generated ids and ids in the file 401 m_xRepository->setStatementRDFa(xSubject, predicates.getAsConstList(), 402 i_rEntry.m_xObject, 403 i_rEntry.m_pRDFaAttributes->m_Content, xDatatype); 404 } 405 catch (uno::Exception &) 406 { 407 OSL_ENSURE(false, "InsertRDFaEntry: setStatementRDFa failed?"); 408 } 409 } 410 411 //////////////////////////////////////////////////////////////////////////// 412 413 RDFaImportHelper::RDFaImportHelper(const SvXMLImport & i_rImport) 414 : m_rImport(i_rImport) 415 { 416 } 417 418 RDFaImportHelper::~RDFaImportHelper() 419 { 420 } 421 422 ::boost::shared_ptr<ParsedRDFaAttributes> 423 RDFaImportHelper::ParseRDFa( 424 ::rtl::OUString const & i_rAbout, 425 ::rtl::OUString const & i_rProperty, 426 ::rtl::OUString const & i_rContent, 427 ::rtl::OUString const & i_rDatatype) 428 { 429 if (!i_rProperty.getLength()) 430 { 431 OSL_TRACE("AddRDFa: invalid input: xhtml:property empty"); 432 return ::boost::shared_ptr<ParsedRDFaAttributes>(); 433 } 434 // must parse CURIEs here: need namespace declaration context 435 RDFaReader reader(GetImport()); 436 const ::rtl::OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) ); 437 if (!about.getLength()) { 438 return ::boost::shared_ptr<ParsedRDFaAttributes>(); 439 } 440 const ::std::vector< ::rtl::OUString > properties( 441 reader.ReadCURIEs(i_rProperty) ); 442 if (!properties.size()) { 443 return ::boost::shared_ptr<ParsedRDFaAttributes>(); 444 } 445 const ::rtl::OUString datatype( i_rDatatype.getLength() 446 ? reader.ReadCURIE(i_rDatatype) 447 : ::rtl::OUString() ); 448 return ::boost::shared_ptr<ParsedRDFaAttributes>( 449 new ParsedRDFaAttributes(about, properties, i_rContent, datatype)); 450 } 451 452 void 453 RDFaImportHelper::AddRDFa( 454 uno::Reference<rdf::XMetadatable> const & i_xObject, 455 ::boost::shared_ptr<ParsedRDFaAttributes> & i_pRDFaAttributes) 456 { 457 if (!i_xObject.is()) 458 { 459 OSL_ENSURE(false, "AddRDFa: invalid arg: null textcontent"); 460 return; 461 } 462 if (!i_pRDFaAttributes.get()) 463 { 464 OSL_ENSURE(false, "AddRDFa: invalid arg: null RDFa attributes"); 465 return; 466 } 467 m_RDFaEntries.push_back(RDFaEntry(i_xObject, i_pRDFaAttributes)); 468 } 469 470 void 471 RDFaImportHelper::ParseAndAddRDFa( 472 uno::Reference<rdf::XMetadatable> const & i_xObject, 473 ::rtl::OUString const & i_rAbout, 474 ::rtl::OUString const & i_rProperty, 475 ::rtl::OUString const & i_rContent, 476 ::rtl::OUString const & i_rDatatype) 477 { 478 ::boost::shared_ptr<ParsedRDFaAttributes> pAttributes( 479 ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) ); 480 if (pAttributes.get()) 481 { 482 AddRDFa(i_xObject, pAttributes); 483 } 484 } 485 486 void RDFaImportHelper::InsertRDFa( 487 uno::Reference< rdf::XRepositorySupplier> const & i_xModel) 488 { 489 OSL_ENSURE(i_xModel.is(), "InsertRDFa: invalid arg: model null"); 490 if (!i_xModel.is()) return; 491 const uno::Reference< rdf::XDocumentRepository > xRepository( 492 i_xModel->getRDFRepository(), uno::UNO_QUERY); 493 OSL_ENSURE(xRepository.is(), "InsertRDFa: no DocumentRepository?"); 494 if (!xRepository.is()) return; 495 RDFaInserter inserter(GetImport().GetComponentContext(), xRepository); 496 ::std::for_each(m_RDFaEntries.begin(), m_RDFaEntries.end(), 497 ::boost::bind(&RDFaInserter::InsertRDFaEntry, &inserter, _1)); 498 } 499 500 } // namespace xmloff 501 502