xref: /aoo42x/main/sfx2/source/bastyp/sfxhtml.cxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_sfx2.hxx"
30 
31 #include <tools/urlobj.hxx>
32 
33 #include <sfx2/objsh.hxx>
34 #include <sfx2/docfile.hxx>
35 #include "openflag.hxx"
36 
37 #include <svtools/htmlkywd.hxx>
38 #include <svtools/htmltokn.h>
39 #include <svtools/imap.hxx>
40 #include <svtools/imapcirc.hxx>
41 #include <svtools/imapobj.hxx>
42 #include <svtools/imappoly.hxx>
43 #include <svtools/imaprect.hxx>
44 #ifndef _SVSTDARR_ULONGS_DECL
45 #define _SVSTDARR_ULONGS
46 #include <svl/svstdarr.hxx>
47 #endif
48 #include <svl/zforlist.hxx>
49 #include <rtl/tencinfo.h>
50 #include <tools/tenccvt.hxx>
51 
52 #include <sfx2/sfxhtml.hxx>
53 
54 #include <com/sun/star/beans/XPropertyContainer.hpp>
55 
56 
57 using namespace ::com::sun::star;
58 
59 
60 sal_Char __FAR_DATA sHTML_MIME_text[] = "text/";
61 sal_Char __FAR_DATA sHTML_MIME_application[] = "application/";
62 sal_Char __FAR_DATA sHTML_MIME_experimental[] = "x-";
63 
64 // <INPUT TYPE=xxx>
65 static HTMLOptionEnum __READONLY_DATA aAreaShapeOptEnums[] =
66 {
67 	{ OOO_STRING_SVTOOLS_HTML_SH_rect,		IMAP_OBJ_RECTANGLE	},
68 	{ OOO_STRING_SVTOOLS_HTML_SH_rectangle,	IMAP_OBJ_RECTANGLE	},
69 	{ OOO_STRING_SVTOOLS_HTML_SH_circ,		IMAP_OBJ_CIRCLE		},
70 	{ OOO_STRING_SVTOOLS_HTML_SH_circle,   	IMAP_OBJ_CIRCLE		},
71 	{ OOO_STRING_SVTOOLS_HTML_SH_poly,   		IMAP_OBJ_POLYGON	},
72 	{ OOO_STRING_SVTOOLS_HTML_SH_polygon,		IMAP_OBJ_POLYGON	},
73 	{ 0,					0					}
74 };
75 
76 SfxHTMLParser::SfxHTMLParser( SvStream& rStream, sal_Bool bIsNewDoc,
77 							  SfxMedium *pMed ) :
78 	HTMLParser( rStream, bIsNewDoc ),
79 	pMedium( pMed ), pDLMedium( 0 ),
80 	nMetaTags( 0 )
81 {
82 	DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW == GetSrcEncoding( ),
83 				"SfxHTMLParser::SfxHTMLParser: Wo kommt der ZS her?" );
84 	DBG_ASSERT( !IsSwitchToUCS2(),
85 				"SfxHTMLParser::SfxHTMLParser: Switch to UCS2?" );
86 
87 	// Altough the real default encoding is ISO8859-1, we use MS-1252
88 	// als default encoding.
89 	SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  RTL_TEXTENCODING_ISO_8859_1 ) );
90 
91 	// If the file starts with a BOM, switch to UCS2.
92 	SetSwitchToUCS2( sal_True );
93 }
94 
95 __EXPORT SfxHTMLParser::~SfxHTMLParser()
96 {
97 	DBG_ASSERT( !pDLMedium, "Da ist ein File-Download stehengeblieben" );
98 	delete pDLMedium;
99 }
100 
101 sal_Bool SfxHTMLParser::ParseMapOptions(ImageMap * pImageMap,
102 									const HTMLOptions * pOptions)
103 {
104 	DBG_ASSERT( pImageMap, "ParseMapOptions: keine Image-Map" );
105 	DBG_ASSERT( pOptions, "ParseMapOptions: keine Optionen" );
106 
107 	String aName;
108 
109 	for( sal_uInt16 i=pOptions->Count(); i; )
110 	{
111 		const HTMLOption *pOption = (*pOptions)[--i];
112 		switch( pOption->GetToken() )
113 		{
114 		case HTML_O_NAME:
115 			aName = pOption->GetString();
116 			break;
117 		}
118 	}
119 
120 	if( aName.Len() )
121 		pImageMap->SetName( aName );
122 
123 	return aName.Len() > 0;
124 }
125 
126 sal_Bool SfxHTMLParser::ParseAreaOptions(ImageMap * pImageMap, const String& rBaseURL,
127 									 const HTMLOptions * pOptions,
128 									 sal_uInt16 nEventMouseOver,
129 									 sal_uInt16 nEventMouseOut )
130 {
131 	DBG_ASSERT( pImageMap, "ParseAreaOptions: keine Image-Map" );
132 	DBG_ASSERT( pOptions, "ParseAreaOptions: keine Optionen" );
133 
134 	sal_uInt16 nShape = IMAP_OBJ_RECTANGLE;
135 	SvULongs aCoords;
136 	String aName, aHRef, aAlt, aTarget, sEmpty;
137 	sal_Bool bNoHRef = sal_False;
138 	SvxMacroTableDtor aMacroTbl;
139 
140 	for( sal_uInt16 i=pOptions->Count(); i; )
141 	{
142 		sal_uInt16 nEvent = 0;
143 		ScriptType eScrpType = STARBASIC;
144 		const HTMLOption *pOption = (*pOptions)[--i];
145 		switch( pOption->GetToken() )
146 		{
147 		case HTML_O_NAME:
148 			aName = pOption->GetString();
149 			break;
150 		case HTML_O_SHAPE:
151 			pOption->GetEnum( nShape, aAreaShapeOptEnums );
152 			break;
153 		case HTML_O_COORDS:
154 			pOption->GetNumbers( aCoords, sal_True );
155 			break;
156 		case HTML_O_HREF:
157             aHRef = INetURLObject::GetAbsURL( rBaseURL, pOption->GetString() );
158 			break;
159 		case HTML_O_NOHREF:
160 			bNoHRef = sal_True;
161 			break;
162 		case HTML_O_ALT:
163 			aAlt = pOption->GetString();
164 			break;
165 		case HTML_O_TARGET:
166 			aTarget = pOption->GetString();
167 			break;
168 
169 		case HTML_O_ONMOUSEOVER:
170 			eScrpType = JAVASCRIPT;
171 		case HTML_O_SDONMOUSEOVER:
172 			nEvent = nEventMouseOver;
173 			goto IMAPOBJ_SETEVENT;
174 
175 		case HTML_O_ONMOUSEOUT:
176 			eScrpType = JAVASCRIPT;
177 		case HTML_O_SDONMOUSEOUT:
178 			nEvent = nEventMouseOut;
179 			goto IMAPOBJ_SETEVENT;
180 IMAPOBJ_SETEVENT:
181 			if( nEvent )
182 			{
183 				String sTmp( pOption->GetString() );
184 				if( sTmp.Len() )
185 				{
186 					sTmp.ConvertLineEnd();
187 					aMacroTbl.Insert( nEvent,
188 						new SvxMacro( sTmp, sEmpty, eScrpType ));
189 				}
190 			}
191 			break;
192 		}
193 	}
194 
195 	if( bNoHRef )
196 		aHRef.Erase();
197 
198 	sal_Bool bNewArea = sal_True;
199 	switch( nShape )
200 	{
201 	case IMAP_OBJ_RECTANGLE:
202 		if( aCoords.Count() >=4 )
203 		{
204 			Rectangle aRec( aCoords[0], aCoords[1],
205 							aCoords[2], aCoords[3] );
206 			IMapRectangleObject aMapRObj( aRec, aHRef, aAlt, String(), aTarget, aName,
207 										  !bNoHRef );
208 			if( aMacroTbl.Count() )
209 				aMapRObj.SetMacroTable( aMacroTbl );
210 			pImageMap->InsertIMapObject( aMapRObj );
211 		}
212 		break;
213 	case IMAP_OBJ_CIRCLE:
214 		if( aCoords.Count() >=3 )
215 		{
216 			Point aPoint( aCoords[0], aCoords[1] );
217 			IMapCircleObject aMapCObj( aPoint, aCoords[2],aHRef, aAlt, String(),
218 									   aTarget, aName, !bNoHRef );
219 			if( aMacroTbl.Count() )
220 				aMapCObj.SetMacroTable( aMacroTbl );
221 			pImageMap->InsertIMapObject( aMapCObj );
222 		}
223 		break;
224 	case IMAP_OBJ_POLYGON:
225 		if( aCoords.Count() >=6 )
226 		{
227 			sal_uInt16 nCount = aCoords.Count() / 2;
228 			Polygon aPoly( nCount );
229 			for( sal_uInt16 i=0; i<nCount; i++ )
230 				aPoly[i] = Point( aCoords[2*i], aCoords[2*i+1] );
231 			IMapPolygonObject aMapPObj( aPoly, aHRef, aAlt, String(), aTarget, aName,
232 										!bNoHRef );
233 			if( aMacroTbl.Count() )
234 				aMapPObj.SetMacroTable( aMacroTbl );
235 			pImageMap->InsertIMapObject( aMapPObj );
236 		}
237 		break;
238 	default:
239 		bNewArea = sal_False;
240 	}
241 
242 	return bNewArea;
243 }
244 
245 
246 void SfxHTMLParser::StartFileDownload( const String& rURL, int nToken,
247 									   SfxObjectShell *pSh )
248 {
249 	DBG_ASSERT( !pDLMedium, "StartFileDwonload bei aktivem Download" );
250 	if( pDLMedium )
251 		return;
252 
253 	pDLMedium = new SfxMedium( rURL, SFX_STREAM_READONLY, sal_False );
254 	if( pSh )
255 	{
256 		// Medium registrieren, damit abgebrochen werden kann
257 		pSh->RegisterTransfer( *pDLMedium );
258 
259 		// Target-Frame uebertragen, damit auch javascript:-URLs
260 		// "geladen" werden koennen.
261 		//const SfxMedium *pShMedium = pSh->GetMedium();
262 		//if( pShMedium )
263 		//	pDLMedium->SetLoadTargetFrame( pShMedium->GetLoadTargetFrame() );
264 	}
265 
266 	// Download anstossen (Achtung: Kann auch synchron sein).
267     if ( sal_True /*pMedium->GetDoneLink() == Link()*/ )
268         pDLMedium->DownLoad();
269     else
270     {
271         // Downloading-Flag auf sal_True setzen. Es werden dann auch
272         // Data-Available-Links, wenn wir in den Pending-Staus gelangen.
273         SetDownloadingFile( sal_True );
274         pDLMedium->DownLoad( STATIC_LINK( this, SfxHTMLParser, FileDownloadDone ) );
275 
276         // Wenn das Dowsnloading-Flag noch gesetzt ist erfolgt der Download
277         // asynchron. Wir gehen dann in den Pedning-Staus und warten dort.
278         // Solange sind alle Aufrufe des Data-Avaialble-Link gesperrt.
279         if( IsDownloadingFile() )
280         {
281             // Den aktuellen Zustand einfrieren und in den Pending-Status gehen.
282             // Wenn der Download beendet oder abgebrochen wurde, wird ueber
283             // NewDataRead ein Continue mit dem uebergeben Token angesteossen.
284             SaveState( nToken );
285             eState = SVPAR_PENDING;
286         }
287     }
288 }
289 
290 sal_Bool SfxHTMLParser::GetFileDownloadMIME( String& rMIME )
291 {
292 	return pDLMedium && pDLMedium->GetErrorCode()==0 &&
293 		   pDLMedium->GetMIMEAndRedirect(rMIME)==0;
294 }
295 
296 sal_Bool SfxHTMLParser::FinishFileDownload( String& rStr )
297 {
298 	String aStr;
299 
300 	sal_Bool bOK = pDLMedium && pDLMedium->GetErrorCode()==0;
301 	if( bOK )
302 	{
303 		SvStream* pStream = pDLMedium->GetInStream();
304 		DBG_ASSERT( pStream, "Kein In-Stream vom Medium erhalten" );
305 
306 		SvMemoryStream aStream;
307 		if( pStream )	// HACK wegen #65563#
308 			aStream << *pStream;
309 
310 		aStream.Seek( STREAM_SEEK_TO_END );
311 		DBG_ASSERT( aStream.Tell() < STRING_MAXLEN,
312 					"File zu lang fuer einen String, Ende abgeschnitten" );
313 		xub_StrLen nLen = aStream.Tell() < STRING_MAXLEN
314 						? (xub_StrLen)aStream.Tell()
315 						: STRING_MAXLEN;
316 
317         // TODO: untested!!!
318 		rtl_TextEncoding eEnc =
319 			GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 );
320 		String sMime;
321 		if( pDLMedium->GetMIMEAndRedirect( sMime ) == 0 )
322 		{
323 			rtl_TextEncoding eMimeEnc = GetEncodingByMIME( sMime );
324 			if( RTL_TEXTENCODING_DONTKNOW != eMimeEnc )
325 				eEnc = eMimeEnc;
326 		}
327 
328 		ByteString sBuffer;
329 		sal_Char* pBuffer = sBuffer.AllocBuffer(nLen);
330 		aStream.Seek( 0 );
331 		aStream.Read((void*)pBuffer, nLen);
332 		rStr = String( S2U(pBuffer) );
333 	}
334 
335 	delete pDLMedium;
336 	pDLMedium = 0;
337 
338 	return bOK;
339 }
340 
341 IMPL_STATIC_LINK( SfxHTMLParser, FileDownloadDone, void*, EMPTYARG )
342 {
343 	// Der Download ist jetzt abgeschlossen. Ausserdem muss/darf der
344 	// Data-Available-Link wieder durchgelassen werden.
345 	pThis->SetDownloadingFile( sal_False );
346 
347 	// ... und einmal aufrufen, damit weitergelesen wird.
348 	pThis->CallAsyncCallLink();
349 
350 	return 0;
351 }
352 
353 void SfxHTMLParser::GetScriptType_Impl( SvKeyValueIterator *pHTTPHeader )
354 {
355 	aScriptType = DEFINE_CONST_UNICODE(SVX_MACRO_LANGUAGE_JAVASCRIPT);
356 	eScriptType = JAVASCRIPT;
357 	if( pHTTPHeader )
358 	{
359 		SvKeyValue aKV;
360 		for( sal_Bool bCont = pHTTPHeader->GetFirst( aKV ); bCont;
361 			 bCont = pHTTPHeader->GetNext( aKV ) )
362 		{
363 			if( aKV.GetKey().EqualsIgnoreCaseAscii(
364 									OOO_STRING_SVTOOLS_HTML_META_content_script_type ) )
365 			{
366 				if( aKV.GetValue().Len() )
367 				{
368 					String aTmp( aKV.GetValue() );
369 					if( aTmp.EqualsIgnoreCaseAscii( sHTML_MIME_text, 0, 5 ) )
370 						aTmp.Erase( 0, 5 );
371 					else if( aTmp.EqualsIgnoreCaseAscii( sHTML_MIME_application,
372 														 0, 12 ) )
373 						aTmp.Erase( 0, 12 );
374 					else
375 						break;
376 
377 					if( aTmp.EqualsIgnoreCaseAscii( sHTML_MIME_experimental, 0,
378 													2 ) )
379 					{
380 						aTmp.Erase( 0, 2 );
381 					}
382 
383 					if( aTmp.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_LG_starbasic ) )
384 					{
385 						eScriptType = STARBASIC;
386 						aScriptType = DEFINE_CONST_UNICODE(SVX_MACRO_LANGUAGE_STARBASIC);
387 					}
388 					if( !aTmp.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_LG_javascript ) )
389 					{
390 						eScriptType = EXTENDED_STYPE;
391 						aScriptType = aTmp;
392 					}
393 				}
394 				break;
395 			}
396 		}
397 	}
398 }
399 
400 ScriptType SfxHTMLParser::GetScriptType( SvKeyValueIterator *pHTTPHeader ) const
401 {
402 	if( !aScriptType.Len() )
403 		((SfxHTMLParser *)this)->GetScriptType_Impl( pHTTPHeader );
404 
405 	return eScriptType;
406 }
407 
408 const String& SfxHTMLParser::GetScriptTypeString(
409 									SvKeyValueIterator *pHTTPHeader ) const
410 {
411 	if( !aScriptType.Len() )
412 		((SfxHTMLParser *)this)->GetScriptType_Impl( pHTTPHeader );
413 
414 	return aScriptType;
415 }
416 
417 double SfxHTMLParser::GetTableDataOptionsValNum( sal_uInt32& nNumForm,
418 		LanguageType& eNumLang, const String& aValStr, const String& aNumStr,
419 		SvNumberFormatter& rFormatter )
420 {
421 	LanguageType eParseLang = (LanguageType )aNumStr.ToInt32();
422 	sal_uInt32 nParseForm =
423 		rFormatter.GetFormatForLanguageIfBuiltIn( 0, eParseLang );
424 	double fVal;
425 	rFormatter.IsNumberFormat( aValStr, nParseForm, fVal );
426 	if ( aNumStr.GetTokenCount( ';' ) > 2 )
427 	{
428 		eNumLang = (LanguageType)aNumStr.GetToken( 1, ';' ).ToInt32();
429 		xub_StrLen nPos = aNumStr.Search( ';' );
430 		nPos = aNumStr.Search( ';', nPos + 1 );
431 		String aFormat( aNumStr.Copy( nPos + 1 ) );
432 		xub_StrLen nCheckPos;
433 		short nType;
434 		if ( eNumLang != LANGUAGE_SYSTEM )
435 			rFormatter.PutEntry( aFormat, nCheckPos, nType, nNumForm, eNumLang );
436 		else
437 			rFormatter.PutandConvertEntry( aFormat, nCheckPos, nType, nNumForm,
438 				eParseLang, eNumLang );
439 	}
440 	else
441 	{
442 		eNumLang = LANGUAGE_SYSTEM;
443 		nNumForm = rFormatter.GetFormatForLanguageIfBuiltIn( 0, eNumLang );
444 	}
445 	return fVal;
446 }
447 
448