1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include "precompiled_xmloff.hxx" 25 26 #include "RDFaImportHelper.hxx" 27 28 #include <xmloff/xmlimp.hxx> 29 #include <xmloff/nmspmap.hxx> 30 31 #include <comphelper/sequenceasvector.hxx> 32 33 #include <tools/string.hxx> // for GetAbsoluteReference 34 35 #include <com/sun/star/rdf/URI.hpp> 36 #include <com/sun/star/rdf/XDocumentMetadataAccess.hpp> 37 #include <com/sun/star/rdf/XDocumentRepository.hpp> 38 39 #include <rtl/ustring.hxx> 40 41 #include <boost/bind.hpp> 42 #include <boost/iterator_adaptors.hpp> 43 #ifndef BOOST_ITERATOR_ADAPTOR_DWA053000_HPP_ // from iterator_adaptors.hpp 44 // N.B.: the check for the header guard _of a specific version of boost_ 45 // is here so this may work on different versions of boost, 46 // which sadly put the goods in different header files 47 #include <boost/iterator/transform_iterator.hpp> 48 #endif 49 50 #include <map> 51 #include <iterator> 52 #include <functional> 53 #include <algorithm> 54 55 56 using namespace ::com::sun::star; 57 58 namespace xmloff { 59 60 /** a bit of context for parsing RDFa attributes */ 61 class SAL_DLLPRIVATE RDFaReader 62 { 63 const SvXMLImport & m_rImport; 64 65 const SvXMLImport & GetImport() const { return m_rImport; } 66 67 //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute 68 ::rtl::OUString GetAbsoluteReference(::rtl::OUString const & i_rURI) const 69 { 70 if (!i_rURI.getLength() || i_rURI[0] == '#') 71 { 72 return GetImport().GetBaseURL() + i_rURI; 73 } 74 else 75 { 76 return GetImport().GetAbsoluteReference(i_rURI); 77 } 78 } 79 80 public: 81 RDFaReader(SvXMLImport const & i_rImport) 82 : m_rImport(i_rImport) 83 { } 84 85 // returns URI or blank node! 86 ::rtl::OUString ReadCURIE(::rtl::OUString const & i_rCURIE) const; 87 88 std::vector< ::rtl::OUString > 89 ReadCURIEs(::rtl::OUString const & i_rCURIEs) const; 90 91 ::rtl::OUString 92 ReadURIOrSafeCURIE( ::rtl::OUString const & i_rURIOrSafeCURIE) const; 93 }; 94 95 /** helper to insert RDFa statements into the RDF repository */ 96 class SAL_DLLPRIVATE RDFaInserter 97 { 98 const uno::Reference<uno::XComponentContext> m_xContext; 99 uno::Reference< rdf::XDocumentRepository > m_xRepository; 100 101 typedef ::std::map< ::rtl::OUString, uno::Reference< rdf::XBlankNode > > 102 BlankNodeMap_t; 103 104 BlankNodeMap_t m_BlankNodeMap; 105 106 public: 107 RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext, 108 uno::Reference< rdf::XDocumentRepository > const & i_xRepository) 109 : m_xContext(i_xContext) 110 , m_xRepository(i_xRepository) 111 {} 112 113 uno::Reference< rdf::XBlankNode > 114 LookupBlankNode(::rtl::OUString const & i_rNodeId ); 115 116 uno::Reference< rdf::XURI > 117 MakeURI( ::rtl::OUString const & i_rURI) const; 118 119 uno::Reference< rdf::XResource> 120 MakeResource( ::rtl::OUString const & i_rResource); 121 122 void InsertRDFaEntry(struct RDFaEntry const & i_rEntry); 123 }; 124 125 /** store parsed RDFa attributes */ 126 struct SAL_DLLPRIVATE ParsedRDFaAttributes 127 { 128 ::rtl::OUString m_About; 129 ::std::vector< ::rtl::OUString > m_Properties; 130 ::rtl::OUString m_Content; 131 ::rtl::OUString m_Datatype; 132 133 ParsedRDFaAttributes( 134 ::rtl::OUString const & i_rAbout, 135 ::std::vector< ::rtl::OUString > const & i_rProperties, 136 ::rtl::OUString const & i_rContent, 137 ::rtl::OUString const & i_rDatatype) 138 : m_About(i_rAbout) 139 , m_Properties(i_rProperties) 140 , m_Content(i_rContent) 141 , m_Datatype(i_rDatatype) 142 { } 143 }; 144 145 /** store metadatable object and its RDFa attributes */ 146 struct SAL_DLLPRIVATE RDFaEntry 147 { 148 uno::Reference<rdf::XMetadatable> m_xObject; 149 ::boost::shared_ptr<ParsedRDFaAttributes> m_pRDFaAttributes; 150 151 RDFaEntry(uno::Reference<rdf::XMetadatable> const & i_xObject, 152 ::boost::shared_ptr<ParsedRDFaAttributes> const& i_pRDFaAttributes) 153 : m_xObject(i_xObject) 154 , m_pRDFaAttributes(i_pRDFaAttributes) 155 { } 156 }; 157 158 //////////////////////////////////////////////////////////////////////////// 159 160 161 static inline bool isWS(const sal_Unicode i_Char) 162 { 163 return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char) 164 || (' ' == i_Char); 165 } 166 167 static ::rtl::OUString splitAtWS(::rtl::OUString & io_rString) 168 { 169 const sal_Int32 len( io_rString.getLength() ); 170 sal_Int32 idxstt(0); 171 while ((idxstt < len) && ( isWS(io_rString[idxstt]))) 172 ++idxstt; // skip leading ws 173 sal_Int32 idxend(idxstt); 174 while ((idxend < len) && (!isWS(io_rString[idxend]))) 175 ++idxend; // the CURIE 176 const ::rtl::OUString ret(io_rString.copy(idxstt, idxend - idxstt)); 177 io_rString = io_rString.copy(idxend); // rest 178 return ret; 179 } 180 181 ::rtl::OUString 182 RDFaReader::ReadCURIE(::rtl::OUString const & i_rCURIE) const 183 { 184 // the RDFa spec says that a prefix is required (it may be empty: ":foo") 185 const sal_Int32 idx( i_rCURIE.indexOf(':') ); 186 if (idx >= 0) 187 { 188 ::rtl::OUString Prefix; 189 ::rtl::OUString LocalName; 190 ::rtl::OUString Namespace; 191 sal_uInt16 nKey( GetImport().GetNamespaceMap()._GetKeyByAttrName( 192 i_rCURIE, &Prefix, &LocalName, &Namespace) ); 193 if (Prefix.equalsAscii("_")) 194 { 195 // eeek, it's a bnode! 196 // "_" is not a valid URI scheme => we can identify bnodes 197 return i_rCURIE; 198 } 199 else 200 { 201 OSL_ENSURE(XML_NAMESPACE_NONE != nKey, "no namespace?"); 202 if ((XML_NAMESPACE_UNKNOWN != nKey) && 203 (XML_NAMESPACE_XMLNS != nKey)) 204 { 205 // N.B.: empty LocalName is valid! 206 const ::rtl::OUString URI(Namespace + LocalName); 207 // return GetImport().GetAbsoluteReference(URI); 208 return GetAbsoluteReference(URI); 209 } 210 else 211 { 212 OSL_TRACE( "ReadCURIE: invalid CURIE: invalid prefix" ); 213 return ::rtl::OUString(); 214 } 215 } 216 } 217 else 218 { 219 OSL_TRACE( "ReadCURIE: invalid CURIE: no prefix" ); 220 return ::rtl::OUString(); 221 } 222 } 223 224 ::std::vector< ::rtl::OUString > 225 RDFaReader::ReadCURIEs(::rtl::OUString const & i_rCURIEs) const 226 { 227 std::vector< ::rtl::OUString > vec; 228 ::rtl::OUString CURIEs(i_rCURIEs); 229 do { 230 ::rtl::OUString curie( splitAtWS(CURIEs) ); 231 if (curie.getLength()) 232 { 233 const ::rtl::OUString uri(ReadCURIE(curie)); 234 if (uri.getLength()) 235 { 236 vec.push_back(uri); 237 } 238 } 239 } 240 while (CURIEs.getLength()); 241 if (!vec.size()) 242 { 243 OSL_TRACE( "ReadCURIEs: invalid CURIEs" ); 244 } 245 return vec; 246 } 247 248 ::rtl::OUString 249 RDFaReader::ReadURIOrSafeCURIE(::rtl::OUString const & i_rURIOrSafeCURIE) const 250 { 251 const sal_Int32 len(i_rURIOrSafeCURIE.getLength()); 252 if (len && (i_rURIOrSafeCURIE[0] == '[')) 253 { 254 if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']')) 255 { 256 return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2)); 257 } 258 else 259 { 260 OSL_TRACE( "ReadURIOrSafeCURIE: invalid SafeCURIE" ); 261 return ::rtl::OUString(); 262 } 263 } 264 else 265 { 266 if (i_rURIOrSafeCURIE.matchAsciiL("_:", 2)) // blank node 267 { 268 OSL_TRACE( "ReadURIOrSafeCURIE: invalid URI: scheme is _" ); 269 return ::rtl::OUString(); 270 } 271 else 272 { 273 // return GetImport().GetAbsoluteReference(i_rURIOrSafeCURIE); 274 return GetAbsoluteReference(i_rURIOrSafeCURIE); 275 } 276 } 277 } 278 279 //////////////////////////////////////////////////////////////////////////// 280 281 uno::Reference< rdf::XBlankNode > 282 RDFaInserter::LookupBlankNode(::rtl::OUString const & i_rNodeId ) 283 { 284 uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] ); 285 if (!rEntry.is()) 286 { 287 rEntry = m_xRepository->createBlankNode(); 288 } 289 return rEntry; 290 } 291 292 uno::Reference< rdf::XURI > 293 RDFaInserter::MakeURI( ::rtl::OUString const & i_rURI) const 294 { 295 if (i_rURI.matchAsciiL("_:", 2)) // blank node 296 { 297 OSL_TRACE("MakeURI: cannot create URI for blank node"); 298 return 0; 299 } 300 else 301 { 302 try 303 { 304 return rdf::URI::create( m_xContext, i_rURI ); 305 } 306 catch (uno::Exception &) 307 { 308 OSL_ENSURE(false, "MakeURI: cannot create URI"); 309 return 0; 310 } 311 } 312 } 313 314 uno::Reference< rdf::XResource> 315 RDFaInserter::MakeResource( ::rtl::OUString const & i_rResource) 316 { 317 if (i_rResource.matchAsciiL("_:", 2)) // blank node 318 { 319 // we cannot use the blank node label as-is: it must be distinct 320 // from labels in other graphs, so create fresh ones per XML stream 321 // N.B.: content.xml and styles.xml are distinct graphs 322 ::rtl::OUString name( i_rResource.copy(2) ); 323 const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) ); 324 OSL_ENSURE(xBNode.is(), "no blank node?"); 325 return uno::Reference<rdf::XResource>( xBNode, uno::UNO_QUERY); 326 } 327 else 328 { 329 return uno::Reference<rdf::XResource>( MakeURI( i_rResource ), 330 uno::UNO_QUERY); 331 } 332 } 333 334 /** i wrote this because c++ implementations cannot agree on which variant 335 of boost::bind and std::mem_fun_ref applied to Reference::is compiles */ 336 class ref_is_null : 337 public ::std::unary_function<sal_Bool, const uno::Reference<rdf::XURI> & > 338 { 339 public: 340 sal_Bool operator() (const uno::Reference<rdf::XURI> & i_rRef) 341 { 342 return !i_rRef.is(); 343 } 344 }; 345 346 void RDFaInserter::InsertRDFaEntry( 347 struct RDFaEntry const & i_rEntry) 348 { 349 OSL_ENSURE(i_rEntry.m_xObject.is(), 350 "InsertRDFaEntry: invalid arg: null object"); 351 if (!i_rEntry.m_xObject.is()) return; 352 353 const uno::Reference< rdf::XResource > xSubject( 354 MakeResource( i_rEntry.m_pRDFaAttributes->m_About ) ); 355 if (!xSubject.is()) 356 { 357 return; // invalid 358 } 359 360 ::comphelper::SequenceAsVector< uno::Reference< rdf::XURI > > predicates; 361 362 predicates.reserve(i_rEntry.m_pRDFaAttributes->m_Properties.size()); 363 364 ::std::remove_copy_if( 365 ::boost::make_transform_iterator( 366 i_rEntry.m_pRDFaAttributes->m_Properties.begin(), 367 ::boost::bind(&RDFaInserter::MakeURI, this, _1)), 368 // argh, this must be the same type :( 369 ::boost::make_transform_iterator( 370 i_rEntry.m_pRDFaAttributes->m_Properties.end(), 371 ::boost::bind(&RDFaInserter::MakeURI, this, _1)), 372 ::std::back_inserter(predicates), 373 ref_is_null() ); 374 // compiles only on wntmsci12 375 // ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool>(&uno::Reference<rdf::XURI>::is, _1))); 376 // compiles on unxsoli4, wntsci12, but not unxlngi6 377 // ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool, com::sun::star::uno::Reference<rdf::XURI> >(&uno::Reference<rdf::XURI>::is, _1))); 378 // compiles on unxsoli4, unxlngi6, but not wntsci12 379 // ::std::not1( ::std::mem_fun_ref(&uno::Reference<rdf::XURI>::is)) ); 380 381 if (!predicates.size()) 382 { 383 return; // invalid 384 } 385 386 uno::Reference<rdf::XURI> xDatatype; 387 if (i_rEntry.m_pRDFaAttributes->m_Datatype.getLength()) 388 { 389 xDatatype = MakeURI( i_rEntry.m_pRDFaAttributes->m_Datatype ); 390 } 391 392 try 393 { 394 // N.B.: this will call xMeta->ensureMetadataReference, which is why 395 // this must be done _after_ importing the whole XML file, 396 // to prevent collision between generated ids and ids in the file 397 m_xRepository->setStatementRDFa(xSubject, predicates.getAsConstList(), 398 i_rEntry.m_xObject, 399 i_rEntry.m_pRDFaAttributes->m_Content, xDatatype); 400 } 401 catch (uno::Exception &) 402 { 403 OSL_ENSURE(false, "InsertRDFaEntry: setStatementRDFa failed?"); 404 } 405 } 406 407 //////////////////////////////////////////////////////////////////////////// 408 409 RDFaImportHelper::RDFaImportHelper(const SvXMLImport & i_rImport) 410 : m_rImport(i_rImport) 411 { 412 } 413 414 RDFaImportHelper::~RDFaImportHelper() 415 { 416 } 417 418 ::boost::shared_ptr<ParsedRDFaAttributes> 419 RDFaImportHelper::ParseRDFa( 420 ::rtl::OUString const & i_rAbout, 421 ::rtl::OUString const & i_rProperty, 422 ::rtl::OUString const & i_rContent, 423 ::rtl::OUString const & i_rDatatype) 424 { 425 if (!i_rProperty.getLength()) 426 { 427 OSL_TRACE("AddRDFa: invalid input: xhtml:property empty"); 428 return ::boost::shared_ptr<ParsedRDFaAttributes>(); 429 } 430 // must parse CURIEs here: need namespace declaration context 431 RDFaReader reader(GetImport()); 432 const ::rtl::OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) ); 433 if (!about.getLength()) { 434 return ::boost::shared_ptr<ParsedRDFaAttributes>(); 435 } 436 const ::std::vector< ::rtl::OUString > properties( 437 reader.ReadCURIEs(i_rProperty) ); 438 if (!properties.size()) { 439 return ::boost::shared_ptr<ParsedRDFaAttributes>(); 440 } 441 const ::rtl::OUString datatype( i_rDatatype.getLength() 442 ? reader.ReadCURIE(i_rDatatype) 443 : ::rtl::OUString() ); 444 return ::boost::shared_ptr<ParsedRDFaAttributes>( 445 new ParsedRDFaAttributes(about, properties, i_rContent, datatype)); 446 } 447 448 void 449 RDFaImportHelper::AddRDFa( 450 uno::Reference<rdf::XMetadatable> const & i_xObject, 451 ::boost::shared_ptr<ParsedRDFaAttributes> & i_pRDFaAttributes) 452 { 453 if (!i_xObject.is()) 454 { 455 OSL_ENSURE(false, "AddRDFa: invalid arg: null textcontent"); 456 return; 457 } 458 if (!i_pRDFaAttributes.get()) 459 { 460 OSL_ENSURE(false, "AddRDFa: invalid arg: null RDFa attributes"); 461 return; 462 } 463 m_RDFaEntries.push_back(RDFaEntry(i_xObject, i_pRDFaAttributes)); 464 } 465 466 void 467 RDFaImportHelper::ParseAndAddRDFa( 468 uno::Reference<rdf::XMetadatable> const & i_xObject, 469 ::rtl::OUString const & i_rAbout, 470 ::rtl::OUString const & i_rProperty, 471 ::rtl::OUString const & i_rContent, 472 ::rtl::OUString const & i_rDatatype) 473 { 474 ::boost::shared_ptr<ParsedRDFaAttributes> pAttributes( 475 ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) ); 476 if (pAttributes.get()) 477 { 478 AddRDFa(i_xObject, pAttributes); 479 } 480 } 481 482 void RDFaImportHelper::InsertRDFa( 483 uno::Reference< rdf::XRepositorySupplier> const & i_xModel) 484 { 485 OSL_ENSURE(i_xModel.is(), "InsertRDFa: invalid arg: model null"); 486 if (!i_xModel.is()) return; 487 const uno::Reference< rdf::XDocumentRepository > xRepository( 488 i_xModel->getRDFRepository(), uno::UNO_QUERY); 489 OSL_ENSURE(xRepository.is(), "InsertRDFa: no DocumentRepository?"); 490 if (!xRepository.is()) return; 491 RDFaInserter inserter(GetImport().GetComponentContext(), xRepository); 492 ::std::for_each(m_RDFaEntries.begin(), m_RDFaEntries.end(), 493 ::boost::bind(&RDFaInserter::InsertRDFaEntry, &inserter, _1)); 494 } 495 496 } // namespace xmloff 497 498