1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #ifndef SC_HTMLPARS_HXX
25 #define SC_HTMLPARS_HXX
26
27 #include <tools/stack.hxx>
28
29 #include <memory>
30 #include <vector>
31 #include <list>
32 #include <map>
33
34 #include "rangelst.hxx"
35 #include "eeparser.hxx"
36
37 const sal_uInt32 SC_HTML_FONTSIZES = 7; // wie Export, HTML-Options
38
39 // Pixel tolerance for SeekOffset and related.
40 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
41 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
42
43 // ============================================================================
44 // BASE class for HTML parser classes
45 // ============================================================================
46
47 class ScHTMLTable;
48
49 /** Base class for HTML parser classes. */
50 class ScHTMLParser : public ScEEParser
51 {
52 protected:
53 sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
54 ScDocument* mpDoc; /// The destination document.
55
56 public:
57 explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
58 virtual ~ScHTMLParser();
59
60 virtual sal_uLong Read( SvStream& rStrm, const String& rBaseURL ) = 0;
61
62 /** Returns the "global table" which contains the entire HTML document. */
63 virtual const ScHTMLTable* GetGlobalTable() const = 0;
64 };
65
66
67 // ============================================================================
68
69 SV_DECL_VARARR_SORT( ScHTMLColOffset, sal_uLong, 16, 4)
70
71 struct ScHTMLTableStackEntry
72 {
73 ScRangeListRef xLockedList;
74 ScEEParseEntry* pCellEntry;
75 ScHTMLColOffset* pLocalColOffset;
76 sal_uLong nFirstTableCell;
77 SCCOL nColCnt;
78 SCROW nRowCnt;
79 SCCOL nColCntStart;
80 SCCOL nMaxCol;
81 sal_uInt16 nTable;
82 sal_uInt16 nTableWidth;
83 sal_uInt16 nColOffset;
84 sal_uInt16 nColOffsetStart;
85 sal_Bool bFirstRow;
ScHTMLTableStackEntryScHTMLTableStackEntry86 ScHTMLTableStackEntry( ScEEParseEntry* pE,
87 const ScRangeListRef& rL, ScHTMLColOffset* pTO,
88 sal_uLong nFTC,
89 SCCOL nCol, SCROW nRow,
90 SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
91 sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
92 sal_Bool bFR )
93 : xLockedList( rL ), pCellEntry( pE ),
94 pLocalColOffset( pTO ),
95 nFirstTableCell( nFTC ),
96 nColCnt( nCol ), nRowCnt( nRow ),
97 nColCntStart( nStart ), nMaxCol( nMax ),
98 nTable( nTab ), nTableWidth( nTW ),
99 nColOffset( nCO ), nColOffsetStart( nCOS ),
100 bFirstRow( bFR )
101 {}
~ScHTMLTableStackEntryScHTMLTableStackEntry102 ~ScHTMLTableStackEntry() {}
103 };
104 DECLARE_STACK( ScHTMLTableStack, ScHTMLTableStackEntry* )
105
106 struct ScHTMLAdjustStackEntry
107 {
108 SCCOL nLastCol;
109 SCROW nNextRow;
110 SCROW nCurRow;
ScHTMLAdjustStackEntryScHTMLAdjustStackEntry111 ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
112 SCROW nCRow )
113 : nLastCol( nLCol ), nNextRow( nNRow ),
114 nCurRow( nCRow )
115 {}
116 };
117 DECLARE_STACK( ScHTMLAdjustStack, ScHTMLAdjustStackEntry* )
118
119
120 // ============================================================================
121
122 class EditEngine;
123 class ScDocument;
124 class HTMLOption;
125
126 class ScHTMLLayoutParser : public ScHTMLParser
127 {
128 private:
129 Size aPageSize;
130 String aBaseURL;
131 ScHTMLTableStack aTableStack;
132 String aString;
133 ScRangeListRef xLockedList; // je Table
134 Table* pTables;
135 ScHTMLColOffset* pColOffset;
136 ScHTMLColOffset* pLocalColOffset; // je Table
137 sal_uLong nFirstTableCell; // je Table
138 short nTableLevel;
139 sal_uInt16 nTable;
140 sal_uInt16 nMaxTable;
141 SCCOL nColCntStart; // erste Col je Table
142 SCCOL nMaxCol; // je Table
143 sal_uInt16 nTableWidth; // je Table
144 sal_uInt16 nColOffset; // aktuell, Pixel
145 sal_uInt16 nColOffsetStart; // Startwert je Table, in Pixel
146 sal_uInt16 nMetaCnt; // fuer ParseMetaOptions
147 sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
148 sal_Bool bCalcWidthHeight; // TRUE: calculate real column width
149 // FALSE: 1 html-col = 1 sc-col
150 sal_Bool bTabInTabCell;
151 sal_Bool bFirstRow; // je Table, ob in erster Zeile
152 sal_Bool bInCell;
153 sal_Bool bInTitle;
154
155 DECL_LINK( HTMLImportHdl, ImportInfo* );
156 void NewActEntry( ScEEParseEntry* );
157 void EntryEnd( ScEEParseEntry*, const ESelection& );
158 void ProcToken( ImportInfo* );
159 void CloseEntry( ImportInfo* );
160 void NextRow( ImportInfo* );
161 void SkipLocked( ScEEParseEntry*, sal_Bool bJoin = sal_True );
162 static sal_Bool SeekOffset( ScHTMLColOffset*, sal_uInt16 nOffset,
163 SCCOL* pCol, sal_uInt16 nOffsetTol );
164 static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
165 sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
166 sal_uInt16 nWidthTol );
167 static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
168 sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
169 sal_uInt16 nWidthTol );
170 static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
171 sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
172 void Colonize( ScEEParseEntry* );
173 sal_uInt16 GetWidth( ScEEParseEntry* );
174 void SetWidths();
175 void Adjust();
176
177 sal_uInt16 GetWidthPixel( const HTMLOption* );
178 sal_Bool IsAtBeginningOfText( ImportInfo* );
179
180 void TableOn( ImportInfo* );
181 void ColOn( ImportInfo* );
182 void TableRowOn( ImportInfo* );
183 void TableRowOff( ImportInfo* );
184 void TableDataOn( ImportInfo* );
185 void TableDataOff( ImportInfo* );
186 void TableOff( ImportInfo* );
187 void Image( ImportInfo* );
188 void AnchorOn( ImportInfo* );
189 void FontOn( ImportInfo* );
190
191 public:
192 ScHTMLLayoutParser( EditEngine*, const String& rBaseURL, const Size& aPageSize, ScDocument* );
193 virtual ~ScHTMLLayoutParser();
194 virtual sal_uLong Read( SvStream&, const String& rBaseURL );
195 virtual const ScHTMLTable* GetGlobalTable() const;
196 };
197
198
199
200 // ============================================================================
201 // HTML DATA QUERY PARSER
202 // ============================================================================
203
204 /** Declares the orientation in or for a table: column or row. */
205 enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
206
207 /** Type for a unique identifier for each table. */
208 typedef sal_uInt16 ScHTMLTableId;
209 /** Identifier of the "global table" (the entire HTML document). */
210 const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
211 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
212 const ScHTMLTableId SC_HTML_NO_TABLE = 0;
213
214 // ============================================================================
215
216 /** A 2D cell position in an HTML table. */
217 struct ScHTMLPos
218 {
219 SCCOL mnCol;
220 SCROW mnRow;
221
ScHTMLPosScHTMLPos222 inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
ScHTMLPosScHTMLPos223 inline explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
224 mnCol( nCol ), mnRow( nRow ) {}
ScHTMLPosScHTMLPos225 inline explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
226
GetScHTMLPos227 inline SCCOLROW Get( ScHTMLOrient eOrient ) const
228 { return (eOrient == tdCol) ? mnCol : mnRow; }
SetScHTMLPos229 inline void Set( SCCOL nCol, SCROW nRow )
230 { mnCol = nCol; mnRow = nRow; }
SetScHTMLPos231 inline void Set( const ScAddress& rAddr )
232 { Set( rAddr.Col(), rAddr.Row() ); }
MoveScHTMLPos233 inline void Move( SCsCOL nColDiff, SCsROW nRowDiff )
234 { mnCol = mnCol + nColDiff; mnRow = mnRow + nRowDiff; }
MakeAddrScHTMLPos235 inline ScAddress MakeAddr() const
236 { return ScAddress( mnCol, mnRow, 0 ); }
237 };
238
operator ==(const ScHTMLPos & rPos1,const ScHTMLPos & rPos2)239 inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
240 {
241 return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
242 }
243
operator <(const ScHTMLPos & rPos1,const ScHTMLPos & rPos2)244 inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
245 {
246 return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
247 }
248
249 // ----------------------------------------------------------------------------
250
251 /** A 2D cell size in an HTML table. */
252 struct ScHTMLSize
253 {
254 SCCOL mnCols;
255 SCROW mnRows;
256
ScHTMLSizeScHTMLSize257 inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
ScHTMLSizeScHTMLSize258 inline explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
259 mnCols( nCols ), mnRows( nRows ) {}
260
GetScHTMLSize261 inline SCCOLROW Get( ScHTMLOrient eOrient ) const
262 { return (eOrient == tdCol) ? mnCols : mnRows; }
SetScHTMLSize263 inline void Set( SCCOL nCols, SCROW nRows )
264 { mnCols = nCols; mnRows = nRows; }
ExpandScHTMLSize265 inline void Expand( SCsCOL nColDiff, SCsROW nRowDiff )
266 { mnCols = mnCols + nColDiff; mnRows = mnRows + nRowDiff; }
267 };
268
operator ==(const ScHTMLSize & rSize1,const ScHTMLSize & rSize2)269 inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
270 {
271 return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
272 }
273
274 // ============================================================================
275
276 /** A single entry containing a line of text or representing a table. */
277 struct ScHTMLEntry : public ScEEParseEntry
278 {
279 public:
280 explicit ScHTMLEntry(
281 const SfxItemSet& rItemSet,
282 ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
283
284 /** Returns true, if the selection of the entry is empty. */
IsEmptyScHTMLEntry285 inline bool IsEmpty() const { return !aSel.HasRange(); }
286 /** Returns true, if the entry has any content to be imported. */
287 bool HasContents() const;
288 /** Returns true, if the entry represents a table. */
IsTableScHTMLEntry289 inline bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
290 /** Returns true, if the entry represents a table. */
GetTableIdScHTMLEntry291 inline ScHTMLTableId GetTableId() const { return nTab; }
292
293 /** Sets or cleares the import always state. */
SetImportAlwaysScHTMLEntry294 inline void SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
295 /** Sets start point of the entry selection to the start of the import info object. */
296 void AdjustStart( const ImportInfo& rInfo );
297 /** Sets end point of the entry selection to the end of the import info object. */
298 void AdjustEnd( const ImportInfo& rInfo );
299 /** Deletes leading and trailing empty paragraphs from the entry. */
300 void Strip( const EditEngine& rEditEngine );
301
302 /** Returns read/write access to the item set of this entry. */
GetItemSetScHTMLEntry303 inline SfxItemSet& GetItemSet() { return aItemSet; }
304 /** Returns read-only access to the item set of this entry. */
GetItemSetScHTMLEntry305 inline const SfxItemSet& GetItemSet() const { return aItemSet; }
306
307 private:
308 bool mbImportAlways; /// true = Always import this entry.
309 };
310
311 // ============================================================================
312
313 /** This struct handles creation of unique table identifiers. */
314 struct ScHTMLTableAutoId
315 {
316 const ScHTMLTableId mnTableId; /// The created unique table identifier.
317 ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
318
319 /** The constructor assigns an unused identifier to member mnTableId. */
320 explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
321 };
322
323 // ----------------------------------------------------------------------------
324
325 class ScHTMLTableMap;
326
327 /** Stores data for one table in an HTML document.
328
329 This class does the main work for importing an HTML document. It manages
330 the correct insertion of parse entries into the correct cells and the
331 creation of nested tables. Recalculation of resulting document size and
332 position is done recursively in all nested tables.
333 */
334 class ScHTMLTable
335 {
336 public:
337 /** Creates a new HTML table without content.
338 @descr Internally handles a current cell position. This position is
339 invalid until first calls of RowOn() and DataOn().
340 @param rParentTable Reference to the parent table that owns this table.
341 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
342 explicit ScHTMLTable(
343 ScHTMLTable& rParentTable,
344 const ImportInfo& rInfo,
345 bool bPreFormText );
346
347 virtual ~ScHTMLTable();
348
349 /** Returns the name of the table, specified in the TABLE tag. */
GetTableName() const350 inline const String& GetTableName() const { return maTableName; }
351 /** Returns the unique identifier of the table. */
GetTableId() const352 inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
353 /** Returns the table size. */
GetSize() const354 inline const ScHTMLSize& GetSize() const { return maSize; }
355 /** Returns the cell spanning of the specified cell. */
356 ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
357
358 /** Searches in all nested tables for the specified table.
359 @param nTableId Unique identifier of the table. */
360 ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
361
362 /** Puts the item into the item set of the current entry. */
363 void PutItem( const SfxPoolItem& rItem );
364 /** Inserts a text portion into current entry. */
365 void PutText( const ImportInfo& rInfo );
366 /** Inserts a new line, if in preformatted text, else does nothing. */
367 void InsertPara( const ImportInfo& rInfo );
368
369 /** Inserts a line break (<br> tag).
370 @descr Inserts the current entry regardless if it is empty. */
371 void BreakOn();
372 /** Inserts a heading line (<p> and <h*> tags). */
373 void HeadingOn();
374 /** Processes a hyperlink (<a> tag). */
375 void AnchorOn();
376
377 /** Starts a *new* table nested in this table (<table> tag).
378 @return Pointer to the new table. */
379 ScHTMLTable* TableOn( const ImportInfo& rInfo );
380 /** Closes *this* table (</table> tag).
381 @return Pointer to the parent table. */
382 ScHTMLTable* TableOff( const ImportInfo& rInfo );
383 /** Starts a *new* table based on preformatted text (<pre> tag).
384 @return Pointer to the new table. */
385 ScHTMLTable* PreOn( const ImportInfo& rInfo );
386 /** Closes *this* table based on preformatted text (</pre> tag).
387 @return Pointer to the parent table. */
388 ScHTMLTable* PreOff( const ImportInfo& rInfo );
389
390 /** Starts next row (<tr> tag).
391 @descr Cell address is invalid until first call of DataOn(). */
392 void RowOn( const ImportInfo& rInfo );
393 /** Closes the current row (<tr> tag).
394 @descr Cell address is invalid until call of RowOn() and DataOn(). */
395 void RowOff( const ImportInfo& rInfo );
396 /** Starts the next cell (<td> or <th> tag). */
397 void DataOn( const ImportInfo& rInfo );
398 /** Closes the current cell (</td> or </th> tag).
399 @descr Cell address is invalid until next call of DataOn(). */
400 void DataOff( const ImportInfo& rInfo );
401
402 /** Starts the body of the HTML document (<body> tag). */
403 void BodyOn( const ImportInfo& rInfo );
404 /** Closes the body of the HTML document (</body> tag). */
405 void BodyOff( const ImportInfo& rInfo );
406
407 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
408 @descr Used to close this table object regardless on opening tag type.
409 @return Pointer to the parent table, or this, if no parent found. */
410 ScHTMLTable* CloseTable( const ImportInfo& rInfo );
411
412 /** Returns the resulting document row/column count of the specified HTML row/column. */
413 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
414 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
415 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
416 /** Returns the total document row/column count in the specified direction. */
417 SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
418 /** Returns the total document row/column count of the specified HTML cell. */
419 ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
420
421 /** Returns the resulting Calc position of the top left edge of the table. */
GetDocPos() const422 inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
423 /** Calculates the resulting Calc position of the specified HTML column/row. */
424 SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos = 0 ) const;
425 /** Calculates the resulting Calc position of the specified HTML cell. */
426 ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
427
428 /** Calculates the current Calc document area of this table. */
429 void GetDocRange( ScRange& rRange ) const;
430
431 /** Applies border formatting to the passed document. */
432 void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
433
434 protected:
435 /** Creates a new HTML table without parent.
436 @descr This constructor is used to create the "global table". */
437 explicit ScHTMLTable(
438 SfxItemPool& rPool,
439 EditEngine& rEditEngine,
440 ScEEParseList& rEEParseList,
441 ScHTMLTableId& rnUnusedId );
442
443 /** Fills all empty cells in this and nested tables with dummy parse entries. */
444 void FillEmptyCells();
445 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
446 void RecalcDocSize();
447 /** Recalculates the position of all cell entries and nested tables.
448 @param rBasePos The origin of the table in the Calc document. */
449 void RecalcDocPos( const ScHTMLPos& rBasePos );
450
451 private:
452 typedef ::std::auto_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
453 typedef ::std::auto_ptr< SfxItemSet > SfxItemSetPtr;
454 typedef ::std::vector< SCCOLROW > ScSizeVec;
455 typedef ::std::list< ScHTMLEntry* > ScHTMLEntryList;
456 typedef ::std::map< ScHTMLPos, ScHTMLEntryList > ScHTMLEntryMap;
457 typedef ::std::auto_ptr< ScHTMLEntry > ScHTMLEntryPtr;
458
459 /** Returns true, if the current cell does not contain an entry yet. */
460 bool IsEmptyCell() const;
461 /** Returns the item set from cell, row, or table, depending on current state. */
462 const SfxItemSet& GetCurrItemSet() const;
463
464 /** Returns true, if import info represents a space character. */
465 static bool IsSpaceCharInfo( const ImportInfo& rInfo );
466
467 /** Creates and returns a new empty flying entry at position (0,0). */
468 ScHTMLEntryPtr CreateEntry() const;
469 /** Creates a new flying entry.
470 @param rInfo Contains the initial edit engine selection for the entry. */
471 void CreateNewEntry( const ImportInfo& rInfo );
472
473 /** Inserts an empty line in front of the next entry. */
474 void InsertLeadingEmptyLine();
475
476 /** Pushes the passed entry into the list of the current cell. */
477 void ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rxEntry );
478 /** Tries to insert the entry into the current cell.
479 @descr If insertion is not possible (i.e., currently no cell open), the
480 entry will be inserted into the parent table.
481 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
482 bool PushEntry( ScHTMLEntryPtr& rxEntry );
483 /** Puts the current entry into the entry list, if it is not empty.
484 @param rInfo The import info struct containing the end position of the current entry.
485 @param bLastInCell true = If cell is still empty, put this entry always.
486 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
487 bool PushEntry( const ImportInfo& rInfo, bool bLastInCell = false );
488 /** Pushes a new entry into current cell which references a nested table.
489 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
490 bool PushTableEntry( ScHTMLTableId nTableId );
491
492 /** Tries to find a table from the table container.
493 @descr Assumes that the table is located in the current container or
494 that the passed table identifier is 0.
495 @param nTableId Unique identifier of the table or 0. */
496 ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
497 /** Inserts a nested table in the current cell at the specified position.
498 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
499 ScHTMLTable* InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
500
501 /** Inserts a new cell in an unused position, starting from current cell position. */
502 void InsertNewCell( const ScHTMLSize& rSpanSize );
503
504 /** Set internal states for a new table row. */
505 void ImplRowOn();
506 /** Set internal states for leaving a table row. */
507 void ImplRowOff();
508 /** Set internal states for entering a new table cell. */
509 void ImplDataOn( const ScHTMLSize& rSpanSize );
510 /** Set internal states for leaving a table cell. */
511 void ImplDataOff();
512
513 /** Inserts additional formatting options from import info into the item set. */
514 void ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
515
516 /** Updates the document column/row size of the specified column or row.
517 @descr Only increases the present count, never decreases. */
518 void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
519 /** Calculates and sets the resulting size the cell needs in the document.
520 @descr Reduces the needed size in merged cells.
521 @param nCellPos The first column/row position of the (merged) cell.
522 @param nCellSpan The cell spanning in the specified orientation.
523 @param nRealDocSize The raw document size of all entries of the cell. */
524 void CalcNeededDocSize(
525 ScHTMLOrient eOrient, SCCOLROW nCellPos,
526 SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
527
528 private:
529 ScHTMLTable* mpParentTable; /// Pointer to parent table.
530 ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
531 String maTableName; /// Table name from <table id> option.
532 ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
533 SfxItemSet maTableItemSet; /// Items for the entire table.
534 SfxItemSetPtr mxRowItemSet; /// Items for the current table row.
535 SfxItemSetPtr mxDataItemSet; /// Items for the current cell.
536 ScRangeList maHMergedCells; /// List of all horizontally merged cells.
537 ScRangeList maVMergedCells; /// List of all vertically merged cells.
538 ScRangeList maUsedCells; /// List of all used cells.
539 EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
540 ScEEParseList& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
541 ScHTMLEntryMap maEntryMap; /// List of entries for each cell.
542 ScHTMLEntryList* mpCurrEntryList; /// Current entry list from map for faster access.
543 ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
544 ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
545 ScHTMLSize maSize; /// Size of the table.
546 ScHTMLPos maCurrCell; /// Address of current cell to fill.
547 ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
548 bool mbBorderOn; /// true = Table borders on.
549 bool mbPreFormText; /// true = Table from preformatted text (<pre> tag).
550 bool mbRowOn; /// true = Inside of <tr> </tr>.
551 bool mbDataOn; /// true = Inside of <td> </td> or <th> </th>.
552 bool mbPushEmptyLine; /// true = Insert empty line before current entry.
553 };
554
555 // ----------------------------------------------------------------------------
556
557 /** The "global table" representing the entire HTML document. */
558 class ScHTMLGlobalTable : public ScHTMLTable
559 {
560 public:
561 explicit ScHTMLGlobalTable(
562 SfxItemPool& rPool,
563 EditEngine& rEditEngine,
564 ScEEParseList& rEEParseList,
565 ScHTMLTableId& rnUnusedId );
566
567 virtual ~ScHTMLGlobalTable();
568
569 /** Recalculates sizes and resulting positions of all document entries. */
570 void Recalc();
571 };
572
573 // ============================================================================
574
575 /** The HTML parser for data queries. Focuses on data import, not on layout.
576
577 Builds the table structure correctly, ignores extended formatting like
578 pictures or column widths.
579 */
580 class ScHTMLQueryParser : public ScHTMLParser
581 {
582 public:
583 explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
584 virtual ~ScHTMLQueryParser();
585
586 virtual sal_uLong Read( SvStream& rStrm, const String& rBaseURL );
587
588 /** Returns the "global table" which contains the entire HTML document. */
589 virtual const ScHTMLTable* GetGlobalTable() const;
590
591 private:
592 /** Handles all possible tags in the HTML document. */
593 void ProcessToken( const ImportInfo& rInfo );
594 /** Inserts a text portion into current entry. */
595 void InsertText( const ImportInfo& rInfo );
596 /** Processes the <font> tag. */
597 void FontOn( const ImportInfo& rInfo );
598
599 /** Processes the <meta> tag. */
600 void MetaOn( const ImportInfo& rInfo );
601 /** Opens the title of the HTML document (<title> tag). */
602 void TitleOn( const ImportInfo& rInfo );
603 /** Closes the title of the HTML document (</title> tag). */
604 void TitleOff( const ImportInfo& rInfo );
605
606 /** Opens a new table at the current position. */
607 void TableOn( const ImportInfo& rInfo );
608 /** Closes the current table. */
609 void TableOff( const ImportInfo& rInfo );
610 /** Opens a new table based on preformatted text. */
611 void PreOn( const ImportInfo& rInfo );
612 /** Closes the current preformatted text table. */
613 void PreOff( const ImportInfo& rInfo );
614
615 /** Closes the current table, regardless on opening tag. */
616 void CloseTable( const ImportInfo& rInfo );
617
618 DECL_LINK( HTMLImportHdl, const ImportInfo* );
619
620 private:
621 typedef ::std::auto_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
622
623 String maTitle; /// The title of the document.
624 ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
625 ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
626 ScHTMLTableId mnUnusedId; /// First unused table identifier.
627 bool mbTitleOn; /// true = Inside of <title> </title>.
628 };
629
630
631 // ============================================================================
632
633 #endif
634
635