1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_editeng.hxx"
26
27 #include <vcl/wrkwin.hxx>
28 #include <vcl/dialog.hxx>
29 #include <vcl/msgbox.hxx>
30 #include <vcl/svapp.hxx>
31 #include <eehtml.hxx>
32 #include <impedit.hxx>
33 #include <editeng/adjitem.hxx>
34 #include <editeng/flditem.hxx>
35 #include <tools/urlobj.hxx>
36 #include <editeng/fhgtitem.hxx>
37 #include <editeng/fontitem.hxx>
38 #include <editeng/ulspitem.hxx>
39 #include <editeng/wghtitem.hxx>
40 #include <svtools/htmltokn.h>
41 #include <svtools/htmlkywd.hxx>
42 #include <tools/tenccvt.hxx>
43
44 #define ACTION_INSERTTEXT 1
45 #define ACTION_INSERTPARABRK 2
46
47 #define STYLE_PRE 101
48
EditHTMLParser(SvStream & rIn,const String & rBaseURL,SvKeyValueIterator * pHTTPHeaderAttrs)49 EditHTMLParser::EditHTMLParser( SvStream& rIn, const String& rBaseURL, SvKeyValueIterator* pHTTPHeaderAttrs )
50 : HTMLParser( rIn, true )
51 , aBaseURL( rBaseURL )
52 {
53 pImpEditEngine = 0;
54 pCurAnchor = 0;
55 bInPara = sal_False;
56 bWasInPara = sal_False;
57 nInTable = 0;
58 nInCell = 0;
59 bInTitle = sal_False;
60 nDefListLevel = 0;
61 nBulletLevel = 0;
62 nNumberingLevel = 0;
63 bFieldsInserted = sal_False;
64
65 DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW == GetSrcEncoding( ), "EditHTMLParser::EditHTMLParser: Where does the encoding come from?" );
66 DBG_ASSERT( !IsSwitchToUCS2(), "EditHTMLParser::::EditHTMLParser: Switch to UCS2?" );
67
68 // Altough the real default encoding is ISO8859-1, we use MS-1252
69 // als default encoding.
70 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
71
72 // If the file starts with a BOM, switch to UCS2.
73 SetSwitchToUCS2( sal_True );
74
75 if ( pHTTPHeaderAttrs )
76 SetEncodingByHTTPHeader( pHTTPHeaderAttrs );
77 }
78
~EditHTMLParser()79 EditHTMLParser::~EditHTMLParser()
80 {
81 delete pCurAnchor;
82 }
83
CallParser(ImpEditEngine * pImpEE,const EditPaM & rPaM)84 SvParserState EditHTMLParser::CallParser( ImpEditEngine* pImpEE, const EditPaM& rPaM )
85 {
86 DBG_ASSERT( pImpEE, "CallParser: ImpEditEngine ?!" );
87 pImpEditEngine = pImpEE;
88 SvParserState _eState = SVPAR_NOTSTARTED;
89 if ( pImpEditEngine )
90 {
91 // Umbrechmimik vom RTF-Import einbauen?
92 aCurSel = EditSelection( rPaM, rPaM );
93
94 if ( pImpEditEngine->aImportHdl.IsSet() )
95 {
96 ImportInfo aImportInfo( HTMLIMP_START, this, pImpEditEngine->CreateESel( aCurSel ) );
97 pImpEditEngine->aImportHdl.Call( &aImportInfo );
98 }
99
100 ImpSetStyleSheet( 0 );
101 _eState = HTMLParser::CallParser();
102
103 if ( pImpEditEngine->aImportHdl.IsSet() )
104 {
105 ImportInfo aImportInfo( HTMLIMP_END, this, pImpEditEngine->CreateESel( aCurSel ) );
106 pImpEditEngine->aImportHdl.Call( &aImportInfo );
107 }
108
109 if ( bFieldsInserted )
110 pImpEditEngine->UpdateFields();
111 }
112 return _eState;
113 }
114
NextToken(int nToken)115 void EditHTMLParser::NextToken( int nToken )
116 {
117 #ifdef DBG_UTIL
118 HTML_TOKEN_IDS xID = (HTML_TOKEN_IDS)nToken;
119 (void)xID;
120 #endif
121
122 switch( nToken )
123 {
124 case HTML_META:
125 {
126 const HTMLOptions *_pOptions = GetOptions();
127 sal_uInt16 nArrLen = _pOptions->Count();
128 sal_Bool bEquiv = sal_False;
129 for ( sal_uInt16 i = 0; i < nArrLen; i++ )
130 {
131 const HTMLOption *pOption = (*_pOptions)[i];
132 switch( pOption->GetToken() )
133 {
134 case HTML_O_HTTPEQUIV:
135 {
136 bEquiv = sal_True;
137 }
138 break;
139 case HTML_O_CONTENT:
140 {
141 if ( bEquiv )
142 {
143 rtl_TextEncoding eEnc = GetEncodingByMIME( pOption->GetString() );
144 if ( eEnc != RTL_TEXTENCODING_DONTKNOW )
145 SetSrcEncoding( eEnc );
146 }
147 }
148 break;
149 }
150 }
151
152 }
153 break;
154 case HTML_PLAINTEXT_ON:
155 case HTML_PLAINTEXT2_ON:
156 bInPara = sal_True;
157 break;
158 case HTML_PLAINTEXT_OFF:
159 case HTML_PLAINTEXT2_OFF:
160 bInPara = sal_False;
161 break;
162
163 case HTML_LINEBREAK:
164 case HTML_NEWPARA:
165 {
166 if ( ( bInPara || nInTable ) &&
167 ( ( nToken == HTML_LINEBREAK ) || HasTextInCurrentPara() ) )
168 {
169 ImpInsertParaBreak();
170 }
171 }
172 break;
173 case HTML_HORZRULE:
174 {
175 if ( HasTextInCurrentPara() )
176 ImpInsertParaBreak();
177 ImpInsertParaBreak();
178 }
179 case HTML_NONBREAKSPACE:
180 {
181 if ( bInPara )
182 {
183 ImpInsertText( String( RTL_CONSTASCII_USTRINGPARAM( " " ) ) );
184 }
185 }
186 break;
187 case HTML_TEXTTOKEN:
188 {
189 // #i110937# for <title> content, call aImportHdl (no SkipGroup), but don't insert the text into the EditEngine
190 if (!bInTitle)
191 {
192 if ( !bInPara )
193 StartPara( sal_False );
194
195 // if ( bInPara || pCurAnchor )
196
197 String aText = aToken;
198 if ( aText.Len() && ( aText.GetChar( 0 ) == ' ' )
199 && ThrowAwayBlank() && !IsReadPRE() )
200 aText.Erase( 0, 1 );
201
202 if ( pCurAnchor )
203 {
204 pCurAnchor->aText += aText;
205 }
206 else
207 {
208 // Nur bis HTML mit 319 geschrieben ?!
209 if ( IsReadPRE() )
210 {
211 sal_uInt16 nTabPos = aText.Search( '\t', 0 );
212 while ( nTabPos != STRING_NOTFOUND )
213 {
214 aText.Erase( nTabPos, 1 );
215 aText.Insert( String( RTL_CONSTASCII_USTRINGPARAM( " " ) ), nTabPos );
216 nTabPos = aText.Search( '\t', nTabPos+8 );
217 }
218 }
219 ImpInsertText( aText );
220 }
221 }
222 }
223 break;
224
225 case HTML_CENTER_ON:
226 case HTML_CENTER_OFF: // if ( bInPara )
227 {
228 sal_uInt16 nNode = pImpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
229 SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
230 aItems.ClearItem( EE_PARA_JUST );
231 if ( nToken == HTML_CENTER_ON )
232 aItems.Put( SvxAdjustItem( SVX_ADJUST_CENTER, EE_PARA_JUST ) );
233 pImpEditEngine->SetParaAttribs( nNode, aItems );
234 }
235 break;
236
237 case HTML_ANCHOR_ON: AnchorStart();
238 break;
239 case HTML_ANCHOR_OFF: AnchorEnd();
240 break;
241
242 case HTML_PARABREAK_ON:
243 if( bInPara && HasTextInCurrentPara() )
244 EndPara( sal_True );
245 StartPara( sal_True );
246 break;
247
248 case HTML_PARABREAK_OFF:
249 if( bInPara )
250 EndPara( sal_True );
251 break;
252
253 case HTML_HEAD1_ON:
254 case HTML_HEAD2_ON:
255 case HTML_HEAD3_ON:
256 case HTML_HEAD4_ON:
257 case HTML_HEAD5_ON:
258 case HTML_HEAD6_ON:
259 {
260 HeadingStart( nToken );
261 }
262 break;
263
264 case HTML_HEAD1_OFF:
265 case HTML_HEAD2_OFF:
266 case HTML_HEAD3_OFF:
267 case HTML_HEAD4_OFF:
268 case HTML_HEAD5_OFF:
269 case HTML_HEAD6_OFF:
270 {
271 HeadingEnd( nToken );
272 }
273 break;
274
275 case HTML_PREFORMTXT_ON:
276 case HTML_XMP_ON:
277 case HTML_LISTING_ON:
278 {
279 StartPara( sal_True );
280 ImpSetStyleSheet( STYLE_PRE );
281 }
282 break;
283
284 case HTML_DEFLIST_ON:
285 {
286 nDefListLevel++;
287 }
288 break;
289
290 case HTML_DEFLIST_OFF:
291 {
292 if( nDefListLevel )
293 nDefListLevel--;
294 }
295 break;
296
297 case HTML_TABLE_ON: nInTable++;
298 break;
299 case HTML_TABLE_OFF: DBG_ASSERT( nInTable, "Nicht in Table, aber TABLE_OFF?" );
300 nInTable--;
301 break;
302
303 case HTML_TABLEHEADER_ON:
304 case HTML_TABLEDATA_ON:
305 nInCell++;
306 // fallthru
307 case HTML_BLOCKQUOTE_ON:
308 case HTML_BLOCKQUOTE_OFF:
309 case HTML_BLOCKQUOTE30_ON:
310 case HTML_BLOCKQUOTE30_OFF:
311 case HTML_LISTHEADER_ON:
312 case HTML_LI_ON:
313 case HTML_DD_ON:
314 case HTML_DT_ON:
315 case HTML_ORDERLIST_ON:
316 case HTML_UNORDERLIST_ON:
317 {
318 sal_Bool bHasText = HasTextInCurrentPara();
319 if ( bHasText )
320 ImpInsertParaBreak();
321 StartPara( sal_False );
322 }
323 break;
324
325 case HTML_TABLEHEADER_OFF:
326 case HTML_TABLEDATA_OFF:
327 {
328 if ( nInCell )
329 nInCell--;
330 }
331 // fallthru
332 case HTML_LISTHEADER_OFF:
333 case HTML_LI_OFF:
334 case HTML_DD_OFF:
335 case HTML_DT_OFF:
336 case HTML_ORDERLIST_OFF:
337 case HTML_UNORDERLIST_OFF: EndPara( sal_False );
338 break;
339
340 case HTML_TABLEROW_ON:
341 case HTML_TABLEROW_OFF: // Nur nach einem CELL ein RETURN, fuer Calc
342
343 case HTML_COL_ON:
344 case HTML_COLGROUP_ON:
345 case HTML_COLGROUP_OFF: break;
346
347 case HTML_FONT_ON: // ...
348 break;
349 case HTML_FONT_OFF: // ...
350 break;
351
352
353 // #58335# kein SkipGroup on/off auf inline markup etc.
354
355 case HTML_TITLE_ON:
356 bInTitle = sal_True;
357 break;
358 case HTML_TITLE_OFF:
359 bInTitle = sal_False;
360 break;
361
362 // globals
363 case HTML_HTML_ON:
364 case HTML_HTML_OFF:
365 case HTML_BODY_ON:
366 case HTML_BODY_OFF:
367 case HTML_HEAD_ON:
368 case HTML_HEAD_OFF:
369 case HTML_FORM_ON:
370 case HTML_FORM_OFF:
371 case HTML_THEAD_ON:
372 case HTML_THEAD_OFF:
373 case HTML_TBODY_ON:
374 case HTML_TBODY_OFF:
375 // inline elements, structural markup
376 // HTML 3.0
377 case HTML_BANNER_ON:
378 case HTML_BANNER_OFF:
379 case HTML_DIVISION_ON:
380 case HTML_DIVISION_OFF:
381 // case HTML_LISTHEADER_ON: //! special handling
382 // case HTML_LISTHEADER_OFF:
383 case HTML_NOTE_ON:
384 case HTML_NOTE_OFF:
385 // inline elements, logical markup
386 // HTML 2.0
387 case HTML_ADDRESS_ON:
388 case HTML_ADDRESS_OFF:
389 // case HTML_BLOCKQUOTE_ON: //! extra Behandlung
390 // case HTML_BLOCKQUOTE_OFF:
391 case HTML_CITIATION_ON:
392 case HTML_CITIATION_OFF:
393 case HTML_CODE_ON:
394 case HTML_CODE_OFF:
395 case HTML_DEFINSTANCE_ON:
396 case HTML_DEFINSTANCE_OFF:
397 case HTML_EMPHASIS_ON:
398 case HTML_EMPHASIS_OFF:
399 case HTML_KEYBOARD_ON:
400 case HTML_KEYBOARD_OFF:
401 case HTML_SAMPLE_ON:
402 case HTML_SAMPLE_OFF:
403 case HTML_STRIKE_ON:
404 case HTML_STRIKE_OFF:
405 case HTML_STRONG_ON:
406 case HTML_STRONG_OFF:
407 case HTML_VARIABLE_ON:
408 case HTML_VARIABLE_OFF:
409 // HTML 3.0
410 case HTML_ABBREVIATION_ON:
411 case HTML_ABBREVIATION_OFF:
412 case HTML_ACRONYM_ON:
413 case HTML_ACRONYM_OFF:
414 case HTML_AUTHOR_ON:
415 case HTML_AUTHOR_OFF:
416 // case HTML_BLOCKQUOTE30_ON: //! extra Behandlung
417 // case HTML_BLOCKQUOTE30_OFF:
418 case HTML_DELETEDTEXT_ON:
419 case HTML_DELETEDTEXT_OFF:
420 case HTML_INSERTEDTEXT_ON:
421 case HTML_INSERTEDTEXT_OFF:
422 case HTML_LANGUAGE_ON:
423 case HTML_LANGUAGE_OFF:
424 case HTML_PERSON_ON:
425 case HTML_PERSON_OFF:
426 case HTML_SHORTQUOTE_ON:
427 case HTML_SHORTQUOTE_OFF:
428 case HTML_SUBSCRIPT_ON:
429 case HTML_SUBSCRIPT_OFF:
430 case HTML_SUPERSCRIPT_ON:
431 case HTML_SUPERSCRIPT_OFF:
432 // inline elements, visual markup
433 // HTML 2.0
434 case HTML_BOLD_ON:
435 case HTML_BOLD_OFF:
436 case HTML_ITALIC_ON:
437 case HTML_ITALIC_OFF:
438 case HTML_TELETYPE_ON:
439 case HTML_TELETYPE_OFF:
440 case HTML_UNDERLINE_ON:
441 case HTML_UNDERLINE_OFF:
442 // HTML 3.0
443 case HTML_BIGPRINT_ON:
444 case HTML_BIGPRINT_OFF:
445 case HTML_STRIKETHROUGH_ON:
446 case HTML_STRIKETHROUGH_OFF:
447 case HTML_SMALLPRINT_ON:
448 case HTML_SMALLPRINT_OFF:
449 // figures
450 case HTML_FIGURE_ON:
451 case HTML_FIGURE_OFF:
452 case HTML_CAPTION_ON:
453 case HTML_CAPTION_OFF:
454 case HTML_CREDIT_ON:
455 case HTML_CREDIT_OFF:
456 // misc
457 case HTML_DIRLIST_ON:
458 case HTML_DIRLIST_OFF:
459 case HTML_FOOTNOTE_ON: //! landen so im Text
460 case HTML_FOOTNOTE_OFF:
461 case HTML_MENULIST_ON:
462 case HTML_MENULIST_OFF:
463 // case HTML_PLAINTEXT_ON: //! extra Behandlung
464 // case HTML_PLAINTEXT_OFF:
465 // case HTML_PREFORMTXT_ON: //! extra Behandlung
466 // case HTML_PREFORMTXT_OFF:
467 case HTML_SPAN_ON:
468 case HTML_SPAN_OFF:
469 // obsolete
470 // case HTML_XMP_ON: //! extra Behandlung
471 // case HTML_XMP_OFF:
472 // case HTML_LISTING_ON: //! extra Behandlung
473 // case HTML_LISTING_OFF:
474 // Netscape
475 case HTML_BLINK_ON:
476 case HTML_BLINK_OFF:
477 case HTML_NOBR_ON:
478 case HTML_NOBR_OFF:
479 case HTML_NOEMBED_ON:
480 case HTML_NOEMBED_OFF:
481 case HTML_NOFRAMES_ON:
482 case HTML_NOFRAMES_OFF:
483 // Internet Explorer
484 case HTML_MARQUEE_ON:
485 case HTML_MARQUEE_OFF:
486 // case HTML_PLAINTEXT2_ON: //! extra Behandlung
487 // case HTML_PLAINTEXT2_OFF:
488 break;
489
490 default:
491 {
492 if ( nToken & HTML_TOKEN_ONOFF )
493 {
494 if ( ( nToken == HTML_UNKNOWNCONTROL_ON ) || ( nToken == HTML_UNKNOWNCONTROL_OFF ) )
495 {
496 ;
497 }
498 else if ( !(nToken & 1) )
499 {
500 DBG_ASSERT( !( nToken & 1 ), "Kein Start-Token ?!" );
501 SkipGroup( nToken + 1 );
502 }
503 }
504 }
505 } // SWITCH
506
507 if ( pImpEditEngine->aImportHdl.IsSet() )
508 {
509 ImportInfo aImportInfo( HTMLIMP_NEXTTOKEN, this, pImpEditEngine->CreateESel( aCurSel ) );
510 aImportInfo.nToken = nToken;
511 aImportInfo.nTokenValue = (short)nTokenValue;
512 if ( nToken == HTML_TEXTTOKEN )
513 aImportInfo.aText = aToken;
514 pImpEditEngine->aImportHdl.Call( &aImportInfo );
515 }
516
517 }
518
ImpInsertParaBreak()519 void EditHTMLParser::ImpInsertParaBreak()
520 {
521 if ( pImpEditEngine->aImportHdl.IsSet() )
522 {
523 ImportInfo aImportInfo( HTMLIMP_INSERTPARA, this, pImpEditEngine->CreateESel( aCurSel ) );
524 pImpEditEngine->aImportHdl.Call( &aImportInfo );
525 }
526 aCurSel = pImpEditEngine->ImpInsertParaBreak( aCurSel );
527 nLastAction = ACTION_INSERTPARABRK;
528 }
529
ImpSetAttribs(const SfxItemSet & rItems,EditSelection * pSel)530 void EditHTMLParser::ImpSetAttribs( const SfxItemSet& rItems, EditSelection* pSel )
531 {
532 // pSel, wenn Zeichenattribute, sonst Absatzattribute fuer den
533 // aktuellen Absatz.
534 DBG_ASSERT( pSel || ( aCurSel.Min().GetNode() == aCurSel.Max().GetNode() ), "ImpInsertAttribs: Selektion?" );
535
536 EditPaM aStartPaM( pSel ? pSel->Min() : aCurSel.Min() );
537 EditPaM aEndPaM( pSel ? pSel->Max() : aCurSel.Max() );
538
539 if ( !pSel )
540 {
541 aStartPaM.SetIndex( 0 );
542 aEndPaM.SetIndex( aEndPaM.GetNode()->Len() );
543 }
544
545 if ( pImpEditEngine->aImportHdl.IsSet() )
546 {
547 EditSelection aSel( aStartPaM, aEndPaM );
548 ImportInfo aImportInfo( HTMLIMP_SETATTR, this, pImpEditEngine->CreateESel( aSel ) );
549 aImportInfo.pAttrs = (void*)&rItems;
550 pImpEditEngine->aImportHdl.Call( &aImportInfo );
551 }
552
553 ContentNode* pSN = aStartPaM.GetNode();
554 sal_uInt16 nStartNode = pImpEditEngine->GetEditDoc().GetPos( pSN );
555
556 // Wenn ein Attribut von 0 bis aktuelle Absatzlaenge geht,
557 // soll es ein Absatz-Attribut sein!
558
559 // Achtung: Selektion kann ueber mehrere Absaetze gehen.
560 // Alle vollstaendigen Absaetze sind Absatzattribute...
561
562 // HTML eigentlich nicht:
563 #ifdef DBG_UTIL
564 ContentNode* pEN = aEndPaM.GetNode();
565 sal_uInt16 nEndNode = pImpEditEngine->GetEditDoc().GetPos( pEN );
566 DBG_ASSERT( nStartNode == nEndNode, "ImpSetAttribs: Mehrere Absaetze?" );
567 #endif
568
569 /*
570 for ( sal_uInt16 z = nStartNode+1; z < nEndNode; z++ )
571 {
572 DBG_ASSERT( pImpEditEngine->GetEditDoc().SaveGetObject( z ), "Node existiert noch nicht(RTF)" );
573 pImpEditEngine->SetParaAttribs( z, rSet.GetAttrSet() );
574 }
575
576 if ( aStartPaM.GetNode() != aEndPaM.GetNode() )
577 {
578 // Den Rest des StartNodes...
579 if ( aStartPaM.GetIndex() == 0 )
580 pImpEditEngine->SetParaAttribs( nStartNode, rSet.GetAttrSet() );
581 else
582 pImpEditEngine->SetAttribs( EditSelection( aStartPaM, EditPaM( aStartPaM.GetNode(), aStartPaM.GetNode()->Len() ) ), rSet.GetAttrSet() );
583
584 // Den Anfang des EndNodes....
585 if ( aEndPaM.GetIndex() == aEndPaM.GetNode()->Len() )
586 pImpEditEngine->SetParaAttribs( nEndNode, rSet.GetAttrSet() );
587 else
588 pImpEditEngine->SetAttribs( EditSelection( EditPaM( aEndPaM.GetNode(), 0 ), aEndPaM ), rSet.GetAttrSet() );
589 }
590 else
591 */
592 {
593 if ( ( aStartPaM.GetIndex() == 0 ) && ( aEndPaM.GetIndex() == aEndPaM.GetNode()->Len() ) )
594 {
595 // Muesse gemergt werden:
596 SfxItemSet aItems( pImpEditEngine->GetParaAttribs( nStartNode ) );
597 aItems.Put( rItems );
598 pImpEditEngine->SetParaAttribs( nStartNode, aItems );
599 }
600 else
601 pImpEditEngine->SetAttribs( EditSelection( aStartPaM, aEndPaM ), rItems );
602 }
603 }
604
ImpSetStyleSheet(sal_uInt16 nHLevel)605 void EditHTMLParser::ImpSetStyleSheet( sal_uInt16 nHLevel )
606 {
607 /*
608 nHLevel: 0: Ausschalten
609 1-6: Heading
610 STYLE_PRE: Preformatted
611 */
612
613 // if ( pImpEditEngine->GetStatus().DoImportRTFStyleSheets() )
614 // {
615 // SvxRTFStyleType* pS = GetStyleTbl().Get( rSet.StyleNo() );
616 // DBG_ASSERT( pS, "Vorlage in RTF nicht definiert!" );
617 // if ( pS )
618 // pImpEditEngine->SetStyleSheet( EditSelection( aStartPaM, aEndPaM ), pS->sName, SFX_STYLE_FAMILY_ALL );
619 // }
620 // else
621 {
622 // Harte Attribute erzeugen...
623 // Reicht fuer Calc, bei StyleSheets muesste noch geklaert werden,
624 // dass diese auch in der App liegen sollten, damit sie beim
625 // fuettern in eine andere Engine auch noch da sind...
626
627 sal_uInt16 nNode = pImpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
628 // SfxItemSet aItems( pImpEditEngine->GetEmptyItemSet() );
629 SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
630
631 aItems.ClearItem( EE_PARA_ULSPACE );
632 aItems.ClearItem( EE_CHAR_FONTHEIGHT );
633 aItems.ClearItem( EE_CHAR_FONTINFO );
634 aItems.ClearItem( EE_CHAR_WEIGHT );
635
636 // Fett in den ersten 3 Headings
637 if ( ( nHLevel >= 1 ) && ( nHLevel <= 3 ) )
638 {
639 SvxWeightItem aWeightItem( WEIGHT_BOLD, EE_CHAR_WEIGHT );
640 aItems.Put( aWeightItem );
641 }
642
643 // Fonthoehe und Abstaende, wenn LogicToLogic moeglich:
644 MapUnit eUnit = pImpEditEngine->GetRefMapMode().GetMapUnit();
645 if ( ( eUnit != MAP_PIXEL ) && ( eUnit != MAP_SYSFONT ) &&
646 ( eUnit != MAP_APPFONT ) && ( eUnit != MAP_RELATIVE ) )
647 {
648 long nPoints = 10;
649 if ( nHLevel == 1 )
650 nPoints = 22;
651 else if ( nHLevel == 2 )
652 nPoints = 16;
653 else if ( nHLevel == 3 )
654 nPoints = 12;
655 else if ( nHLevel == 4 )
656 nPoints = 11;
657
658 nPoints = OutputDevice::LogicToLogic( nPoints, MAP_POINT, eUnit );
659 SvxFontHeightItem aHeightItem( nPoints, 100, EE_CHAR_FONTHEIGHT );
660 aItems.Put( aHeightItem );
661
662 // Absatzabstaende, wenn Heading:
663 if ( !nHLevel || ((nHLevel >= 1) && (nHLevel <= 6)) )
664 {
665 SvxULSpaceItem aULSpaceItem( EE_PARA_ULSPACE );
666 aULSpaceItem.SetUpper( (sal_uInt16)OutputDevice::LogicToLogic( 42, MAP_10TH_MM, eUnit ) );
667 aULSpaceItem.SetLower( (sal_uInt16)OutputDevice::LogicToLogic( 35, MAP_10TH_MM, eUnit ) );
668 aItems.Put( aULSpaceItem );
669 }
670 }
671
672 // Bei Pre einen proportionalen Font waehlen
673 if ( nHLevel == STYLE_PRE )
674 {
675 Font aFont = OutputDevice::GetDefaultFont( DEFAULTFONT_FIXED, LANGUAGE_SYSTEM, 0 );
676 SvxFontItem aFontItem( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO );
677 aItems.Put( aFontItem );
678 }
679
680 pImpEditEngine->SetParaAttribs( nNode, aItems );
681 }
682 }
683
ImpInsertText(const String & rText)684 void EditHTMLParser::ImpInsertText( const String& rText )
685 {
686 String aText( rText );
687 if ( pImpEditEngine->aImportHdl.IsSet() )
688 {
689 ImportInfo aImportInfo( HTMLIMP_INSERTTEXT, this, pImpEditEngine->CreateESel( aCurSel ) );
690 aImportInfo.aText = aText;
691 pImpEditEngine->aImportHdl.Call( &aImportInfo );
692 }
693
694 aCurSel = pImpEditEngine->ImpInsertText( aCurSel, aText );
695 nLastAction = ACTION_INSERTTEXT;
696 }
697
SkipGroup(int nEndToken)698 void EditHTMLParser::SkipGroup( int nEndToken )
699 {
700 // #69109# groups in cells are closed upon leaving the cell, because those
701 // ******* web authors don't know their job
702 // for example: <td><form></td> lacks a closing </form>
703 sal_uInt8 nCellLevel = nInCell;
704 int nToken;
705 while( nCellLevel <= nInCell && ( (nToken = GetNextToken() ) != nEndToken ) && nToken )
706 {
707 switch ( nToken )
708 {
709 case HTML_TABLEHEADER_ON:
710 case HTML_TABLEDATA_ON:
711 nInCell++;
712 break;
713 case HTML_TABLEHEADER_OFF:
714 case HTML_TABLEDATA_OFF:
715 if ( nInCell )
716 nInCell--;
717 break;
718 }
719 }
720 }
721
StartPara(sal_Bool bReal)722 void EditHTMLParser::StartPara( sal_Bool bReal )
723 {
724 if ( bReal )
725 {
726 const HTMLOptions *_pOptions = GetOptions();
727 sal_uInt16 nArrLen = _pOptions->Count();
728 SvxAdjust eAdjust = SVX_ADJUST_LEFT;
729 for ( sal_uInt16 i = 0; i < nArrLen; i++ )
730 {
731 const HTMLOption *pOption = (*_pOptions)[i];
732 switch( pOption->GetToken() )
733 {
734 case HTML_O_ALIGN:
735 {
736 if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_right ) == COMPARE_EQUAL )
737 eAdjust = SVX_ADJUST_RIGHT;
738 else if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_middle ) == COMPARE_EQUAL )
739 eAdjust = SVX_ADJUST_CENTER;
740 else if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_center ) == COMPARE_EQUAL )
741 eAdjust = SVX_ADJUST_CENTER;
742 else
743 eAdjust = SVX_ADJUST_LEFT;
744 }
745 break;
746 }
747 }
748 SfxItemSet aItemSet( pImpEditEngine->GetEmptyItemSet() );
749 aItemSet.Put( SvxAdjustItem( eAdjust, EE_PARA_JUST ) );
750 ImpSetAttribs( aItemSet );
751 }
752 bInPara = sal_True;
753 }
754
EndPara(sal_Bool)755 void EditHTMLParser::EndPara( sal_Bool )
756 {
757 if ( bInPara )
758 {
759 sal_Bool bHasText = HasTextInCurrentPara();
760 if ( bHasText )
761 ImpInsertParaBreak();
762 // Nur, wenn ohne Absatzabstaende gearbeitet wird...
763 // if ( !nInTable && bReal && (nNumberingLevel<=1) && (nBulletLevel<=1) )
764 // ImpInsertParaBreak();
765 }
766 bInPara = sal_False;
767 }
768
ThrowAwayBlank()769 sal_Bool EditHTMLParser::ThrowAwayBlank()
770 {
771 // Ein Blank muss weggeschmissen werden, wenn der neue Text mit einem
772 // Blank beginnt und der aktuelle Absatz leer ist oder mit einem
773 // Blank endet...
774 ContentNode* pNode = aCurSel.Max().GetNode();
775 if ( pNode->Len() && ( pNode->GetChar( pNode->Len()-1 ) != ' ' ) )
776 return sal_False;
777 return sal_True;
778 }
779
HasTextInCurrentPara()780 sal_Bool EditHTMLParser::HasTextInCurrentPara()
781 {
782 return aCurSel.Max().GetNode()->Len() ? sal_True : sal_False;
783 }
784
AnchorStart()785 void EditHTMLParser::AnchorStart()
786 {
787 // Anker im Anker ignoriern
788 if ( !pCurAnchor )
789 {
790 const HTMLOptions* _pOptions = GetOptions();
791 sal_uInt16 nArrLen = _pOptions->Count();
792
793 String aRef;
794
795 for ( sal_uInt16 i = 0; i < nArrLen; i++ )
796 {
797 const HTMLOption* pOption = (*_pOptions)[i];
798 switch( pOption->GetToken() )
799 {
800 case HTML_O_HREF:
801 aRef = pOption->GetString();
802 break;
803 }
804 }
805
806 if ( aRef.Len() )
807 {
808 String aURL = aRef;
809 if ( aURL.Len() && ( aURL.GetChar( 0 ) != '#' ) )
810 {
811 INetURLObject aTargetURL;
812 INetURLObject aRootURL( aBaseURL );
813 aRootURL.GetNewAbsURL( aRef, &aTargetURL );
814 aURL = aTargetURL.GetMainURL( INetURLObject::DECODE_TO_IURI );
815 }
816 pCurAnchor = new AnchorInfo;
817 pCurAnchor->aHRef = aURL;
818 }
819 }
820 }
821
AnchorEnd()822 void EditHTMLParser::AnchorEnd()
823 {
824 if ( pCurAnchor )
825 {
826 // Als URL-Feld einfuegen...
827 SvxFieldItem aFld( SvxURLField( pCurAnchor->aHRef, pCurAnchor->aText, SVXURLFORMAT_REPR ), EE_FEATURE_FIELD );
828 aCurSel = pImpEditEngine->InsertField( aCurSel, aFld );
829 bFieldsInserted = sal_True;
830 delete pCurAnchor;
831 pCurAnchor = 0;
832
833 if ( pImpEditEngine->aImportHdl.IsSet() )
834 {
835 ImportInfo aImportInfo( HTMLIMP_INSERTFIELD, this, pImpEditEngine->CreateESel( aCurSel ) );
836 pImpEditEngine->aImportHdl.Call( &aImportInfo );
837 }
838 }
839 }
840
HeadingStart(int nToken)841 void EditHTMLParser::HeadingStart( int nToken )
842 {
843 bWasInPara = bInPara;
844 StartPara( sal_False );
845
846 if ( bWasInPara && HasTextInCurrentPara() )
847 ImpInsertParaBreak();
848
849 sal_uInt16 nId = sal::static_int_cast< sal_uInt16 >(
850 1 + ( ( nToken - HTML_HEAD1_ON ) / 2 ) );
851 DBG_ASSERT( (nId >= 1) && (nId <= 9), "HeadingStart: ID kann nicht stimmen!" );
852 ImpSetStyleSheet( nId );
853 }
854
HeadingEnd(int)855 void EditHTMLParser::HeadingEnd( int )
856 {
857 EndPara( sal_False );
858 ImpSetStyleSheet( 0 );
859
860 if ( bWasInPara )
861 {
862 bInPara = sal_True;
863 bWasInPara = sal_False;
864 }
865 }
866