1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include <documentbuilder.hxx>
29 
30 #include <string.h>
31 #include <stdio.h>
32 #include <stdarg.h>
33 
34 #include <libxml/xmlerror.h>
35 #include <libxml/tree.h>
36 
37 #include <boost/shared_ptr.hpp>
38 
39 #include <rtl/alloc.h>
40 #include <rtl/memory.h>
41 #include <rtl/ustrbuf.hxx>
42 
43 #include <cppuhelper/implbase1.hxx>
44 
45 #include <com/sun/star/xml/sax/SAXParseException.hpp>
46 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
47 #include <com/sun/star/task/XInteractionHandler.hpp>
48 
49 #include <ucbhelper/content.hxx>
50 #include <ucbhelper/commandenvironment.hxx>
51 
52 #include <node.hxx>
53 #include <document.hxx>
54 
55 
56 using ::rtl::OUStringBuffer;
57 using ::rtl::OString;
58 using ::com::sun::star::xml::sax::InputSource;
59 using namespace ucbhelper;
60 using namespace ::com::sun::star::ucb;
61 using ::com::sun::star::task::XInteractionHandler;
62 
63 
64 namespace DOM
65 {
66 
67 	class CDefaultEntityResolver : public cppu::WeakImplHelper1< XEntityResolver >
68 	{
69 	public:
70 	    virtual InputSource SAL_CALL resolveEntity( const OUString& sPublicId, const OUString& sSystemId )
71 			throw (::com::sun::star::uno::RuntimeException)
72 		{
73 			InputSource is;
74 			is.sPublicId = sPublicId;
75 			is.sSystemId = sSystemId;
76 			is.sEncoding = OUString();
77 
78 			try {
79 				Reference< XCommandEnvironment > aEnvironment(
80 					new CommandEnvironment(Reference< XInteractionHandler >(),
81 										   Reference< XProgressHandler >() ));
82 				Content aContent(sSystemId, aEnvironment);
83 
84 				is.aInputStream = aContent.openStream();
85 			} catch (com::sun::star::uno::Exception) {
86 				OSL_ENSURE(sal_False, "exception in default entity resolver");
87 				is.aInputStream = Reference< XInputStream >();
88 			}
89 			return is;
90 		}
91 
92 	};
93 
94     CDocumentBuilder::CDocumentBuilder(
95             Reference< XMultiServiceFactory > const& xFactory)
96         : m_xFactory(xFactory)
97         , m_xEntityResolver(new CDefaultEntityResolver())
98     {
99         // init libxml. libxml will protect itself against multiple
100         // initializations so there is no problem here if this gets
101         // called multiple times.
102         xmlInitParser();
103     }
104 
105     Reference< XInterface > CDocumentBuilder::_getInstance(const Reference< XMultiServiceFactory >& rSMgr)
106     {
107         return static_cast< XDocumentBuilder* >(new CDocumentBuilder(rSMgr));
108     }
109 
110     const char* CDocumentBuilder::aImplementationName = "com.sun.star.comp.xml.dom.DocumentBuilder";
111     const char* CDocumentBuilder::aSupportedServiceNames[] = {
112         "com.sun.star.xml.dom.DocumentBuilder",
113         NULL
114     };
115 
116     OUString CDocumentBuilder::_getImplementationName()
117     {
118 	    return OUString::createFromAscii(aImplementationName);
119     }
120     Sequence<OUString> CDocumentBuilder::_getSupportedServiceNames()
121     {
122 	    Sequence<OUString> aSequence;
123 	    for (int i=0; aSupportedServiceNames[i]!=NULL; i++) {
124 		    aSequence.realloc(i+1);
125 		    aSequence[i]=(OUString::createFromAscii(aSupportedServiceNames[i]));
126 	    }
127 	    return aSequence;
128     }
129 
130     Sequence< OUString > SAL_CALL CDocumentBuilder::getSupportedServiceNames()
131         throw (RuntimeException)
132     {
133         return CDocumentBuilder::_getSupportedServiceNames();
134     }
135 
136     OUString SAL_CALL CDocumentBuilder::getImplementationName()
137         throw (RuntimeException)
138     {
139         return CDocumentBuilder::_getImplementationName();
140     }
141 
142     sal_Bool SAL_CALL CDocumentBuilder::supportsService(const OUString& aServiceName)
143         throw (RuntimeException)
144     {
145         Sequence< OUString > supported = CDocumentBuilder::_getSupportedServiceNames();
146         for (sal_Int32 i=0; i<supported.getLength(); i++)
147         {
148             if (supported[i] == aServiceName) return sal_True;
149         }
150         return sal_False;
151     }
152 
153     Reference< XDOMImplementation > SAL_CALL CDocumentBuilder::getDOMImplementation()
154         throw (RuntimeException)
155     {
156 
157         return Reference< XDOMImplementation >();
158     }
159 
160     sal_Bool SAL_CALL CDocumentBuilder::isNamespaceAware()
161         throw (RuntimeException)
162     {
163         return sal_True;
164     }
165 
166     sal_Bool SAL_CALL CDocumentBuilder::isValidating()
167         throw (RuntimeException)
168     {
169         return sal_False;
170     }
171 
172     Reference< XDocument > SAL_CALL CDocumentBuilder::newDocument()
173         throw (RuntimeException)
174     {
175         ::osl::MutexGuard const g(m_Mutex);
176 
177         // create a new document
178         xmlDocPtr pDocument = xmlNewDoc((const xmlChar*)"1.0");
179         Reference< XDocument > const xRet(
180                 CDocument::CreateCDocument(pDocument).get());
181         return xRet;
182     }
183 
184 	static OUString make_error_message(xmlParserCtxtPtr ctxt)
185 	{
186 		OUStringBuffer buf;
187 		buf.appendAscii(ctxt->lastError.message);
188 		buf.appendAscii("Line: ");
189 		buf.append(static_cast<sal_Int32>(ctxt->lastError.line));
190 		buf.appendAscii("\nColumn: ");
191 		buf.append(static_cast<sal_Int32>(ctxt->lastError.int2));
192 		OUString msg = buf.makeStringAndClear();
193 		return msg;
194 	}
195 
196 	// -- callbacks and context struct for parsing from stream
197 	// -- c-linkage, so the callbacks can be used by libxml
198 	extern "C" {
199 
200 	// context struct passed to IO functions
201 	typedef struct context {
202 		CDocumentBuilder *pBuilder;
203 		Reference< XInputStream > rInputStream;
204 		bool close;
205 		bool freeOnClose;
206 	} context_t;
207 
208     static int xmlIO_read_func( void *context, char *buffer, int len)
209     {
210 		// get the context...
211         context_t *pctx = static_cast<context_t*>(context);
212 		if (!pctx->rInputStream.is())
213 			return -1;
214 		try {
215 			// try to read the requested number of bytes
216             Sequence< sal_Int8 > chunk(len);
217             int nread = pctx->rInputStream->readBytes(chunk, len);
218 
219             // copy bytes to the provided buffer
220             rtl_copyMemory(buffer, chunk.getConstArray(), nread);
221             return nread;
222 		} catch (com::sun::star::uno::Exception& ex) {
223             (void) ex;
224             OSL_ENSURE(sal_False, OUStringToOString(ex.Message, RTL_TEXTENCODING_UTF8).getStr());
225             return -1;
226         }
227     }
228 
229     static int xmlIO_close_func(void* context)
230 	{
231 		// get the context...
232 		context_t *pctx = static_cast<context_t*>(context);
233 		if (!pctx->rInputStream.is())
234 			return 0;
235         try
236         {
237 			if (pctx->close)
238 				pctx->rInputStream->closeInput();
239 			if (pctx->freeOnClose)
240 				delete pctx;
241             return 0;
242 		} catch (com::sun::star::uno::Exception& ex) {
243             (void) ex;
244             OSL_ENSURE(sal_False, OUStringToOString(ex.Message, RTL_TEXTENCODING_UTF8).getStr());
245             return -1;
246         }
247     }
248 
249 	static xmlParserInputPtr resolve_func(void *ctx,
250                                 const xmlChar *publicId,
251                                 const xmlChar *systemId)
252 	{
253 		// get the CDocumentBuilder object
254 		xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
255 		CDocumentBuilder *builder = static_cast< CDocumentBuilder* >(ctxt->_private);
256 		Reference< XEntityResolver > resolver = builder->getEntityResolver();
257 		OUString sysid;
258 		if (systemId != 0)
259 			sysid = OUString((sal_Char*)systemId, strlen((char*)systemId), RTL_TEXTENCODING_UTF8);
260 		OUString pubid;
261 		if (publicId != 0)
262 			pubid = OUString((sal_Char*)publicId, strlen((char*)publicId), RTL_TEXTENCODING_UTF8);
263 
264 		// resolve the entity
265 		InputSource src = resolver->resolveEntity(pubid, sysid);
266 
267 		// create IO context on heap because this call will no longer be on the stack
268 		// when IO is actually performed through the callbacks. The close function must
269 		// free the memory which is indicated by the freeOnClose field in the context struct
270 		context_t *c = new context_t;
271 		c->pBuilder = builder;
272 		c->rInputStream = src.aInputStream;
273 		c->close = true;
274 		c->freeOnClose = true;
275 
276 		// set up the inputBuffer and inputPtr for libxml
277 		xmlParserInputBufferPtr pBuffer =
278 			xmlParserInputBufferCreateIO(xmlIO_read_func, xmlIO_close_func, c, XML_CHAR_ENCODING_NONE);
279 		xmlParserInputPtr pInput =
280 					xmlNewIOInputStream(ctxt, pBuffer, XML_CHAR_ENCODING_NONE);
281 		return pInput;
282 	}
283 
284 #if 0
285 	static xmlParserInputPtr external_entity_loader(const char *URL, const char * /*ID*/, xmlParserCtxtPtr ctxt)
286 	{
287 		// just call our resolver function using the URL as systemId
288 		return resolve_func(ctxt, 0, (const xmlChar*)URL);
289 	}
290 #endif
291 
292 	// default warning handler triggers assertion
293 	static void warning_func(void * ctx, const char * /*msg*/, ...)
294 	{
295 		OUStringBuffer buf(OUString::createFromAscii("libxml2 warning\n"));
296 		buf.append(make_error_message(static_cast< xmlParserCtxtPtr >(ctx)));
297 		OString msg = OUStringToOString(buf.makeStringAndClear(), RTL_TEXTENCODING_ASCII_US);
298 		OSL_ENSURE(sal_False, msg.getStr());
299 	}
300 
301 	// default error handler triggers assertion
302 	static void error_func(void * ctx, const char * /*msg*/, ...)
303 	{
304 		OUStringBuffer buf(OUString::createFromAscii("libxml2 error\n"));
305 		buf.append(make_error_message(static_cast< xmlParserCtxtPtr >(ctx)));
306 		OString msg = OUStringToOString(buf.makeStringAndClear(), RTL_TEXTENCODING_ASCII_US);
307 		OSL_ENSURE(sal_False, msg.getStr());
308 	}
309 
310 	} // extern "C"
311 
312     void throwEx(xmlParserCtxtPtr ctxt) {
313         OUString msg = make_error_message(ctxt);
314         com::sun::star::xml::sax::SAXParseException saxex;
315         saxex.Message = msg;
316         saxex.LineNumber = static_cast<sal_Int32>(ctxt->lastError.line);
317         saxex.ColumnNumber = static_cast<sal_Int32>(ctxt->lastError.int2);
318         throw saxex;
319     }
320 
321     Reference< XDocument > SAL_CALL CDocumentBuilder::parse(const Reference< XInputStream >& is)
322         throw (RuntimeException, SAXParseException, IOException)
323     {
324         if (!is.is()) {
325             throw RuntimeException();
326         }
327 
328         ::osl::MutexGuard const g(m_Mutex);
329 
330 		// encoding...
331 		/*
332 		xmlChar *encstr = (xmlChar*) OUStringToOString(src.sEncoding, RTL_TEXTENCODING_UTF8).getStr();
333 		xmlCharEncoding enc = xmlParseCharEncoding(encstr);
334 		*/
335 
336         ::boost::shared_ptr<xmlParserCtxt> const pContext(
337                 xmlNewParserCtxt(), xmlFreeParserCtxt);
338 
339 		// register error functions to prevent errors being printed
340 		// on the console
341         pContext->_private = this;
342         pContext->sax->error = error_func;
343         pContext->sax->warning = warning_func;
344         pContext->sax->resolveEntity = resolve_func;
345 
346 		// IO context struct
347 		context_t c;
348 		c.pBuilder = this;
349 		c.rInputStream = is;
350 		// we did not open the stream, thus we do not close it.
351 		c.close = false;
352 		c.freeOnClose = false;
353         xmlDocPtr const pDoc = xmlCtxtReadIO(pContext.get(),
354                 xmlIO_read_func, xmlIO_close_func, &c, 0, 0, 0);
355 
356 		if (pDoc == 0) {
357             throwEx(pContext.get());
358         }
359         Reference< XDocument > const xRet(
360                 CDocument::CreateCDocument(pDoc).get());
361         return xRet;
362     }
363 
364 	Reference< XDocument > SAL_CALL CDocumentBuilder::parseURI(const OUString& sUri)
365 		throw (RuntimeException, SAXParseException, IOException)
366 	{
367         ::osl::MutexGuard const g(m_Mutex);
368 
369         ::boost::shared_ptr<xmlParserCtxt> const pContext(
370                 xmlNewParserCtxt(), xmlFreeParserCtxt);
371         pContext->_private = this;
372         pContext->sax->error = error_func;
373         pContext->sax->warning = warning_func;
374         pContext->sax->resolveEntity = resolve_func;
375 		// xmlSetExternalEntityLoader(external_entity_loader);
376 		OString oUri = OUStringToOString(sUri, RTL_TEXTENCODING_UTF8);
377 		char *uri = (char*) oUri.getStr();
378         xmlDocPtr pDoc = xmlCtxtReadFile(pContext.get(), uri, 0, 0);
379 		if (pDoc == 0) {
380             throwEx(pContext.get());
381         }
382         Reference< XDocument > const xRet(
383                 CDocument::CreateCDocument(pDoc).get());
384         return xRet;
385 	}
386 
387     void SAL_CALL
388     CDocumentBuilder::setEntityResolver(Reference< XEntityResolver > const& xER)
389 		throw (RuntimeException)
390 	{
391         ::osl::MutexGuard const g(m_Mutex);
392 
393         m_xEntityResolver = xER;
394     }
395 
396 	Reference< XEntityResolver > SAL_CALL CDocumentBuilder::getEntityResolver()
397 		throw (RuntimeException)
398     {
399         ::osl::MutexGuard const g(m_Mutex);
400 
401         return m_xEntityResolver;
402     }
403 
404     void SAL_CALL
405     CDocumentBuilder::setErrorHandler(Reference< XErrorHandler > const& xEH)
406 		throw (RuntimeException)
407     {
408         ::osl::MutexGuard const g(m_Mutex);
409 
410         m_xErrorHandler = xEH;
411     }
412 }
413