/************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *************************************************************/ #include "precompiled_xmloff.hxx" #include "RDFaImportHelper.hxx" #include #include #include #include // for GetAbsoluteReference #include #include #include #include #include #include #ifndef BOOST_ITERATOR_ADAPTOR_DWA053000_HPP_ // from iterator_adaptors.hpp // N.B.: the check for the header guard _of a specific version of boost_ // is here so this may work on different versions of boost, // which sadly put the goods in different header files #include #endif #include #include #include #include using namespace ::com::sun::star; namespace xmloff { /** a bit of context for parsing RDFa attributes */ class SAL_DLLPRIVATE RDFaReader { const SvXMLImport & m_rImport; const SvXMLImport & GetImport() const { return m_rImport; } //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute ::rtl::OUString GetAbsoluteReference(::rtl::OUString const & i_rURI) const { if (!i_rURI.getLength() || i_rURI[0] == '#') { return GetImport().GetBaseURL() + i_rURI; } else { return GetImport().GetAbsoluteReference(i_rURI); } } public: RDFaReader(SvXMLImport const & i_rImport) : m_rImport(i_rImport) { } // returns URI or blank node! ::rtl::OUString ReadCURIE(::rtl::OUString const & i_rCURIE) const; std::vector< ::rtl::OUString > ReadCURIEs(::rtl::OUString const & i_rCURIEs) const; ::rtl::OUString ReadURIOrSafeCURIE( ::rtl::OUString const & i_rURIOrSafeCURIE) const; }; /** helper to insert RDFa statements into the RDF repository */ class SAL_DLLPRIVATE RDFaInserter { const uno::Reference m_xContext; uno::Reference< rdf::XDocumentRepository > m_xRepository; typedef ::std::map< ::rtl::OUString, uno::Reference< rdf::XBlankNode > > BlankNodeMap_t; BlankNodeMap_t m_BlankNodeMap; public: RDFaInserter(uno::Reference const & i_xContext, uno::Reference< rdf::XDocumentRepository > const & i_xRepository) : m_xContext(i_xContext) , m_xRepository(i_xRepository) {} uno::Reference< rdf::XBlankNode > LookupBlankNode(::rtl::OUString const & i_rNodeId ); uno::Reference< rdf::XURI > MakeURI( ::rtl::OUString const & i_rURI) const; uno::Reference< rdf::XResource> MakeResource( ::rtl::OUString const & i_rResource); void InsertRDFaEntry(struct RDFaEntry const & i_rEntry); }; /** store parsed RDFa attributes */ struct SAL_DLLPRIVATE ParsedRDFaAttributes { ::rtl::OUString m_About; ::std::vector< ::rtl::OUString > m_Properties; ::rtl::OUString m_Content; ::rtl::OUString m_Datatype; ParsedRDFaAttributes( ::rtl::OUString const & i_rAbout, ::std::vector< ::rtl::OUString > const & i_rProperties, ::rtl::OUString const & i_rContent, ::rtl::OUString const & i_rDatatype) : m_About(i_rAbout) , m_Properties(i_rProperties) , m_Content(i_rContent) , m_Datatype(i_rDatatype) { } }; /** store metadatable object and its RDFa attributes */ struct SAL_DLLPRIVATE RDFaEntry { uno::Reference m_xObject; ::boost::shared_ptr m_pRDFaAttributes; RDFaEntry(uno::Reference const & i_xObject, ::boost::shared_ptr const& i_pRDFaAttributes) : m_xObject(i_xObject) , m_pRDFaAttributes(i_pRDFaAttributes) { } }; //////////////////////////////////////////////////////////////////////////// static inline bool isWS(const sal_Unicode i_Char) { return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char) || (' ' == i_Char); } static ::rtl::OUString splitAtWS(::rtl::OUString & io_rString) { const sal_Int32 len( io_rString.getLength() ); sal_Int32 idxstt(0); while ((idxstt < len) && ( isWS(io_rString[idxstt]))) ++idxstt; // skip leading ws sal_Int32 idxend(idxstt); while ((idxend < len) && (!isWS(io_rString[idxend]))) ++idxend; // the CURIE const ::rtl::OUString ret(io_rString.copy(idxstt, idxend - idxstt)); io_rString = io_rString.copy(idxend); // rest return ret; } ::rtl::OUString RDFaReader::ReadCURIE(::rtl::OUString const & i_rCURIE) const { // the RDFa spec says that a prefix is required (it may be empty: ":foo") const sal_Int32 idx( i_rCURIE.indexOf(':') ); if (idx >= 0) { ::rtl::OUString Prefix; ::rtl::OUString LocalName; ::rtl::OUString Namespace; sal_uInt16 nKey( GetImport().GetNamespaceMap()._GetKeyByAttrName( i_rCURIE, &Prefix, &LocalName, &Namespace) ); if (Prefix.equalsAscii("_")) { // eeek, it's a bnode! // "_" is not a valid URI scheme => we can identify bnodes return i_rCURIE; } else { OSL_ENSURE(XML_NAMESPACE_NONE != nKey, "no namespace?"); if ((XML_NAMESPACE_UNKNOWN != nKey) && (XML_NAMESPACE_XMLNS != nKey)) { // N.B.: empty LocalName is valid! const ::rtl::OUString URI(Namespace + LocalName); // return GetImport().GetAbsoluteReference(URI); return GetAbsoluteReference(URI); } else { OSL_TRACE( "ReadCURIE: invalid CURIE: invalid prefix" ); } } } else { OSL_TRACE( "ReadCURIE: invalid CURIE: no prefix" ); } return ::rtl::OUString(); } ::std::vector< ::rtl::OUString > RDFaReader::ReadCURIEs(::rtl::OUString const & i_rCURIEs) const { std::vector< ::rtl::OUString > vec; ::rtl::OUString CURIEs(i_rCURIEs); do { ::rtl::OUString curie( splitAtWS(CURIEs) ); if (curie.getLength()) { const ::rtl::OUString uri(ReadCURIE(curie)); if (uri.getLength()) { vec.push_back(uri); } } } while (CURIEs.getLength()); if (!vec.size()) { OSL_TRACE( "ReadCURIEs: invalid CURIEs" ); } return vec; } ::rtl::OUString RDFaReader::ReadURIOrSafeCURIE(::rtl::OUString const & i_rURIOrSafeCURIE) const { const sal_Int32 len(i_rURIOrSafeCURIE.getLength()); if (len && (i_rURIOrSafeCURIE[0] == '[')) { if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']')) { return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2)); } else { OSL_TRACE( "ReadURIOrSafeCURIE: invalid SafeCURIE" ); return ::rtl::OUString(); } } else { if (i_rURIOrSafeCURIE.matchAsciiL("_:", 2)) // blank node { OSL_TRACE( "ReadURIOrSafeCURIE: invalid URI: scheme is _" ); return ::rtl::OUString(); } else { // return GetImport().GetAbsoluteReference(i_rURIOrSafeCURIE); return GetAbsoluteReference(i_rURIOrSafeCURIE); } } } //////////////////////////////////////////////////////////////////////////// uno::Reference< rdf::XBlankNode > RDFaInserter::LookupBlankNode(::rtl::OUString const & i_rNodeId ) { uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] ); if (!rEntry.is()) { rEntry = m_xRepository->createBlankNode(); } return rEntry; } uno::Reference< rdf::XURI > RDFaInserter::MakeURI( ::rtl::OUString const & i_rURI) const { if (i_rURI.matchAsciiL("_:", 2)) // blank node { OSL_TRACE("MakeURI: cannot create URI for blank node"); return 0; } else { try { return rdf::URI::create( m_xContext, i_rURI ); } catch (uno::Exception &) { OSL_ENSURE(false, "MakeURI: cannot create URI"); return 0; } } } uno::Reference< rdf::XResource> RDFaInserter::MakeResource( ::rtl::OUString const & i_rResource) { if (i_rResource.matchAsciiL("_:", 2)) // blank node { // we cannot use the blank node label as-is: it must be distinct // from labels in other graphs, so create fresh ones per XML stream // N.B.: content.xml and styles.xml are distinct graphs ::rtl::OUString name( i_rResource.copy(2) ); const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) ); OSL_ENSURE(xBNode.is(), "no blank node?"); return uno::Reference( xBNode, uno::UNO_QUERY); } else { return uno::Reference( MakeURI( i_rResource ), uno::UNO_QUERY); } } /** i wrote this because c++ implementations cannot agree on which variant of boost::bind and std::mem_fun_ref applied to Reference::is compiles */ class ref_is_null : public ::std::unary_function & > { public: sal_Bool operator() (const uno::Reference & i_rRef) { return !i_rRef.is(); } }; void RDFaInserter::InsertRDFaEntry( struct RDFaEntry const & i_rEntry) { OSL_ENSURE(i_rEntry.m_xObject.is(), "InsertRDFaEntry: invalid arg: null object"); if (!i_rEntry.m_xObject.is()) return; const uno::Reference< rdf::XResource > xSubject( MakeResource( i_rEntry.m_pRDFaAttributes->m_About ) ); if (!xSubject.is()) { return; // invalid } ::comphelper::SequenceAsVector< uno::Reference< rdf::XURI > > predicates; predicates.reserve(i_rEntry.m_pRDFaAttributes->m_Properties.size()); ::std::remove_copy_if( ::boost::make_transform_iterator( i_rEntry.m_pRDFaAttributes->m_Properties.begin(), ::boost::bind(&RDFaInserter::MakeURI, this, _1)), // argh, this must be the same type :( ::boost::make_transform_iterator( i_rEntry.m_pRDFaAttributes->m_Properties.end(), ::boost::bind(&RDFaInserter::MakeURI, this, _1)), ::std::back_inserter(predicates), ref_is_null() ); // compiles only on wntmsci12 // ::boost::bind( ::std::logical_not(), ::boost::bind(&uno::Reference::is, _1))); // compiles on unxsoli4, wntsci12, but not unxlngi6 // ::boost::bind( ::std::logical_not(), ::boost::bind >(&uno::Reference::is, _1))); // compiles on unxsoli4, unxlngi6, but not wntsci12 // ::std::not1( ::std::mem_fun_ref(&uno::Reference::is)) ); if (!predicates.size()) { return; // invalid } uno::Reference xDatatype; if (i_rEntry.m_pRDFaAttributes->m_Datatype.getLength()) { xDatatype = MakeURI( i_rEntry.m_pRDFaAttributes->m_Datatype ); } try { // N.B.: this will call xMeta->ensureMetadataReference, which is why // this must be done _after_ importing the whole XML file, // to prevent collision between generated ids and ids in the file m_xRepository->setStatementRDFa(xSubject, predicates.getAsConstList(), i_rEntry.m_xObject, i_rEntry.m_pRDFaAttributes->m_Content, xDatatype); } catch (uno::Exception &) { OSL_ENSURE(false, "InsertRDFaEntry: setStatementRDFa failed?"); } } //////////////////////////////////////////////////////////////////////////// RDFaImportHelper::RDFaImportHelper(const SvXMLImport & i_rImport) : m_rImport(i_rImport) { } RDFaImportHelper::~RDFaImportHelper() { } ::boost::shared_ptr RDFaImportHelper::ParseRDFa( ::rtl::OUString const & i_rAbout, ::rtl::OUString const & i_rProperty, ::rtl::OUString const & i_rContent, ::rtl::OUString const & i_rDatatype) { if (!i_rProperty.getLength()) { OSL_TRACE("AddRDFa: invalid input: xhtml:property empty"); return ::boost::shared_ptr(); } // must parse CURIEs here: need namespace declaration context RDFaReader reader(GetImport()); const ::rtl::OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) ); if (!about.getLength()) { return ::boost::shared_ptr(); } const ::std::vector< ::rtl::OUString > properties( reader.ReadCURIEs(i_rProperty) ); if (!properties.size()) { return ::boost::shared_ptr(); } const ::rtl::OUString datatype( i_rDatatype.getLength() ? reader.ReadCURIE(i_rDatatype) : ::rtl::OUString() ); return ::boost::shared_ptr( new ParsedRDFaAttributes(about, properties, i_rContent, datatype)); } void RDFaImportHelper::AddRDFa( uno::Reference const & i_xObject, ::boost::shared_ptr & i_pRDFaAttributes) { if (!i_xObject.is()) { OSL_ENSURE(false, "AddRDFa: invalid arg: null textcontent"); return; } if (!i_pRDFaAttributes.get()) { OSL_ENSURE(false, "AddRDFa: invalid arg: null RDFa attributes"); return; } m_RDFaEntries.push_back(RDFaEntry(i_xObject, i_pRDFaAttributes)); } void RDFaImportHelper::ParseAndAddRDFa( uno::Reference const & i_xObject, ::rtl::OUString const & i_rAbout, ::rtl::OUString const & i_rProperty, ::rtl::OUString const & i_rContent, ::rtl::OUString const & i_rDatatype) { ::boost::shared_ptr pAttributes( ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) ); if (pAttributes.get()) { AddRDFa(i_xObject, pAttributes); } } void RDFaImportHelper::InsertRDFa( uno::Reference< rdf::XRepositorySupplier> const & i_xModel) { OSL_ENSURE(i_xModel.is(), "InsertRDFa: invalid arg: model null"); if (!i_xModel.is()) return; const uno::Reference< rdf::XDocumentRepository > xRepository( i_xModel->getRDFRepository(), uno::UNO_QUERY); OSL_ENSURE(xRepository.is(), "InsertRDFa: no DocumentRepository?"); if (!xRepository.is()) return; RDFaInserter inserter(GetImport().GetComponentContext(), xRepository); ::std::for_each(m_RDFaEntries.begin(), m_RDFaEntries.end(), ::boost::bind(&RDFaInserter::InsertRDFaEntry, &inserter, _1)); } } // namespace xmloff