1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sw.hxx"
26
27 #include <tools/stream.hxx>
28 #include <hintids.hxx>
29 #include <rtl/tencinfo.h>
30 #include <sfx2/printer.hxx>
31 #include <editeng/fontitem.hxx>
32 #include <editeng/langitem.hxx>
33 #include <editeng/brkitem.hxx>
34 #include <editeng/scripttypeitem.hxx>
35 #include <shellio.hxx>
36 #include <doc.hxx>
37 #include <swtypes.hxx>
38 #include <ndtxt.hxx>
39 #include <pam.hxx>
40 #include <frmatr.hxx>
41 #include <fltini.hxx>
42 #include <pagedesc.hxx>
43 #include <breakit.hxx>
44 #include <swerror.h>
45 #ifndef _STATSTR_HRC
46 #include <statstr.hrc> // ResId fuer Statusleiste
47 #endif
48 #include <mdiexp.hxx> // ...Percent()
49 #include <poolfmt.hxx>
50
51 #include "vcl/metric.hxx"
52
53 #define ASC_BUFFLEN 4096
54
55 class SwASCIIParser
56 {
57 SwDoc* pDoc;
58 SwPaM* pPam;
59 SvStream& rInput;
60 sal_Char* pArr;
61 const SwAsciiOptions& rOpt;
62 SfxItemSet* pItemSet;
63 long nFileSize;
64 sal_uInt16 nScript;
65 bool bNewDoc;
66
67 sal_uLong ReadChars();
68 void InsertText( const String& rStr );
69
70 public:
71 SwASCIIParser( SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
72 int bReadNewDoc, const SwAsciiOptions& rOpts );
73 ~SwASCIIParser();
74
75 sal_uLong CallParser();
76 };
77
78
79 // Aufruf fuer die allg. Reader-Schnittstelle
Read(SwDoc & rDoc,const String &,SwPaM & rPam,const String &)80 sal_uLong AsciiReader::Read( SwDoc &rDoc, const String&, SwPaM &rPam, const String & )
81 {
82 if( !pStrm )
83 {
84 ASSERT( sal_False, "ASCII-Read without stream" );
85 return ERR_SWG_READ_ERROR;
86 }
87
88 //JP 18.01.96: Alle Ueberschriften sind normalerweise ohne
89 // Kapitelnummer. Darum hier explizit abschalten
90 // weil das Default jetzt wieder auf AN ist.
91 if( !bInsertMode )
92 Reader::SetNoOutlineNum( rDoc );
93
94 SwASCIIParser* pParser = new SwASCIIParser( &rDoc, rPam, *pStrm,
95 !bInsertMode, aOpt.GetASCIIOpts() );
96 sal_uLong nRet = pParser->CallParser();
97
98 delete pParser;
99 // after Read reset the options
100 aOpt.ResetASCIIOpts();
101 return nRet;
102 }
103
SwASCIIParser(SwDoc * pD,const SwPaM & rCrsr,SvStream & rIn,int bReadNewDoc,const SwAsciiOptions & rOpts)104 SwASCIIParser::SwASCIIParser(SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
105 int bReadNewDoc, const SwAsciiOptions& rOpts)
106 : pDoc(pD), rInput(rIn), rOpt(rOpts), nScript(0), bNewDoc(bReadNewDoc)
107 {
108 pPam = new SwPaM( *rCrsr.GetPoint() );
109 pArr = new sal_Char [ ASC_BUFFLEN + 2 ];
110
111 pItemSet = new SfxItemSet( pDoc->GetAttrPool(),
112 RES_CHRATR_FONT, RES_CHRATR_LANGUAGE,
113 RES_CHRATR_CJK_FONT, RES_CHRATR_CJK_LANGUAGE,
114 RES_CHRATR_CTL_FONT, RES_CHRATR_CTL_LANGUAGE,
115 0 );
116
117 // set defaults from the options
118 if( rOpt.GetLanguage() )
119 {
120 SvxLanguageItem aLang( (LanguageType)rOpt.GetLanguage(),
121 RES_CHRATR_LANGUAGE );
122 pItemSet->Put( aLang );
123 pItemSet->Put( aLang, RES_CHRATR_CJK_LANGUAGE );
124 pItemSet->Put( aLang, RES_CHRATR_CTL_LANGUAGE );
125 }
126 if( rOpt.GetFontName().Len() )
127 {
128 Font aTextFont( rOpt.GetFontName(), Size( 0, 10 ) );
129 if( pDoc->getPrinter( false ) )
130 aTextFont = pDoc->getPrinter( false )->GetFontMetric( aTextFont );
131 SvxFontItem aFont( aTextFont.GetFamily(), aTextFont.GetName(),
132 aEmptyStr, aTextFont.GetPitch(), aTextFont.GetCharSet(), RES_CHRATR_FONT );
133 pItemSet->Put( aFont );
134 pItemSet->Put( aFont, RES_CHRATR_CJK_FONT );
135 pItemSet->Put( aFont, RES_CHRATR_CTL_FONT );
136 }
137 }
138
~SwASCIIParser()139 SwASCIIParser::~SwASCIIParser()
140 {
141 delete pPam;
142 delete [] pArr;
143 delete pItemSet;
144 }
145
146
147 // Aufruf des Parsers
CallParser()148 sal_uLong SwASCIIParser::CallParser()
149 {
150 rInput.Seek(STREAM_SEEK_TO_END);
151 rInput.ResetError();
152
153 nFileSize = rInput.Tell();
154 rInput.Seek(STREAM_SEEK_TO_BEGIN);
155 rInput.ResetError();
156
157 ::StartProgress( STR_STATSTR_W4WREAD, 0, nFileSize, pDoc->GetDocShell() );
158
159 SwPaM* pInsPam = 0;
160 xub_StrLen nSttCntnt = 0;
161 if (!bNewDoc)
162 {
163 const SwNodeIndex& rTmp = pPam->GetPoint()->nNode;
164 pInsPam = new SwPaM( rTmp, rTmp, 0, -1 );
165 nSttCntnt = pPam->GetPoint()->nContent.GetIndex();
166 }
167
168 SwTxtFmtColl *pColl = 0;
169
170 if (bNewDoc)
171 {
172 pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_HTML_PRE, false);
173 if (!pColl)
174 pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_STANDARD,false);
175 if (pColl)
176 pDoc->SetTxtFmtColl(*pPam, pColl);
177 }
178
179 sal_uLong nError = ReadChars();
180
181 if( pItemSet )
182 {
183 // set only the attribute, for scanned scripts.
184 if( !( SCRIPTTYPE_LATIN & nScript ))
185 {
186 pItemSet->ClearItem( RES_CHRATR_FONT );
187 pItemSet->ClearItem( RES_CHRATR_LANGUAGE );
188 }
189 if( !( SCRIPTTYPE_ASIAN & nScript ))
190 {
191 pItemSet->ClearItem( RES_CHRATR_CJK_FONT );
192 pItemSet->ClearItem( RES_CHRATR_CJK_LANGUAGE );
193 }
194 if( !( SCRIPTTYPE_COMPLEX & nScript ))
195 {
196 pItemSet->ClearItem( RES_CHRATR_CTL_FONT );
197 pItemSet->ClearItem( RES_CHRATR_CTL_LANGUAGE );
198 }
199 if( pItemSet->Count() )
200 {
201 if( bNewDoc )
202 {
203 if (pColl)
204 {
205 // Using the pool defaults for the font causes significant
206 // trouble for the HTML filter, because it is not able
207 // to export the pool defaults (or to be more precise:
208 // the HTML filter is not able to detect whether a pool
209 // default has changed or not. Even a comparison with the
210 // HTMLi template does not work, because the defaults are
211 // not copied when a new doc is created. The result of
212 // comparing pool defaults therefor would be that the
213 // defaults are exported always if the have changed for
214 // text documents in general. That's not sensible, as well
215 // as it is not sensible to export them always.
216 sal_uInt16 aWhichIds[4] =
217 {
218 RES_CHRATR_FONT, RES_CHRATR_CJK_FONT,
219 RES_CHRATR_CTL_FONT, 0
220 };
221 sal_uInt16 *pWhichIds = aWhichIds;
222 while (*pWhichIds)
223 {
224 const SfxPoolItem *pItem;
225 if (SFX_ITEM_SET == pItemSet->GetItemState(*pWhichIds,
226 false, &pItem))
227 {
228 pColl->SetFmtAttr( *pItem );
229 pItemSet->ClearItem( *pWhichIds );
230 }
231 ++pWhichIds;
232 }
233 }
234 if (pItemSet->Count())
235 pDoc->SetDefault(*pItemSet);
236 }
237 else if( pInsPam )
238 {
239 // then set over the insert range the defined attributes
240 *pInsPam->GetMark() = *pPam->GetPoint();
241 pInsPam->GetPoint()->nNode++;
242 pInsPam->GetPoint()->nContent.Assign(
243 pInsPam->GetCntntNode(), nSttCntnt );
244
245 // !!!!!
246 ASSERT( sal_False, "Have to change - hard attr. to para. style" );
247 pDoc->InsertItemSet( *pInsPam, *pItemSet, 0 );
248 }
249 }
250 delete pItemSet, pItemSet = 0;
251 }
252
253 if( pInsPam )
254 delete pInsPam;
255
256 ::EndProgress( pDoc->GetDocShell() );
257 return nError;
258 }
259
ReadChars()260 sal_uLong SwASCIIParser::ReadChars()
261 {
262 sal_Unicode *pStt = 0, *pEnd = 0, *pLastStt = 0;
263 long nReadCnt = 0, nLineLen = 0;
264 sal_Unicode cLastCR = 0;
265 bool bSwapUnicode = false;
266
267 const SwAsciiOptions *pUseMe=&rOpt;
268 SwAsciiOptions aEmpty;
269 if (nFileSize >= 2 &&
270 aEmpty.GetFontName() == rOpt.GetFontName() &&
271 aEmpty.GetCharSet() == rOpt.GetCharSet() &&
272 aEmpty.GetLanguage() == rOpt.GetLanguage() &&
273 aEmpty.GetParaFlags() == rOpt.GetParaFlags())
274 {
275 sal_uLong nLen, nOrig;
276 nOrig = nLen = rInput.Read(pArr, ASC_BUFFLEN);
277 CharSet eCharSet;
278 bool bRet = SwIoSystem::IsDetectableText(pArr, nLen, &eCharSet, &bSwapUnicode);
279 ASSERT(bRet, "Autodetect of text import without nag dialog must "
280 "have failed");
281 if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
282 {
283 aEmpty.SetCharSet(eCharSet);
284 rInput.SeekRel(-(long(nLen)));
285 }
286 else
287 rInput.SeekRel(-(long(nOrig)));
288 pUseMe=&aEmpty;
289 }
290
291 rtl_TextToUnicodeConverter hConverter=0;
292 rtl_TextToUnicodeContext hContext=0;
293 CharSet currentCharSet = pUseMe->GetCharSet();
294 if (RTL_TEXTENCODING_UCS2 != currentCharSet)
295 {
296 if( currentCharSet == RTL_TEXTENCODING_DONTKNOW )
297 currentCharSet = RTL_TEXTENCODING_ASCII_US;
298 hConverter = rtl_createTextToUnicodeConverter( currentCharSet );
299 ASSERT( hConverter, "no string convert available" );
300 if (!hConverter)
301 return ERROR_SW_READ_BASE;
302 bSwapUnicode = false;
303 hContext = rtl_createTextToUnicodeContext( hConverter );
304 }
305 else if (pUseMe != &aEmpty) // Already successfully figured out type
306 {
307 rInput.StartReadingUnicodeText( currentCharSet );
308 bSwapUnicode = rInput.IsEndianSwap();
309 }
310
311 String sWork;
312 sal_uLong nArrOffset = 0;
313
314 do {
315 if( pStt >= pEnd )
316 {
317 if( pLastStt != pStt )
318 InsertText( String( pLastStt ));
319
320 // lese einen neuen Block ein
321 sal_uLong lGCount;
322 if( SVSTREAM_OK != rInput.GetError() || 0 == (lGCount =
323 rInput.Read( pArr + nArrOffset,
324 ASC_BUFFLEN - nArrOffset )))
325 break; // aus der WHILE-Schleife heraus
326
327 /*
328 #98380#
329 If there was some unconverted bytes on the last cycle then they
330 were put at the beginning of the array, so total bytes available
331 to convert this cycle includes them. If we found 0 following bytes
332 then we ignore the previous partial character.
333 */
334 lGCount+=nArrOffset;
335
336 if( hConverter )
337 {
338 sal_uInt32 nInfo;
339 sal_Size nNewLen = lGCount, nCntBytes;
340 sal_Unicode* pBuf = sWork.AllocBuffer( static_cast< xub_StrLen >(nNewLen) );
341
342 nNewLen = rtl_convertTextToUnicode( hConverter, hContext,
343 pArr, lGCount, pBuf, nNewLen,
344 (
345 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
346 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
347 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
348 RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
349 ),
350 &nInfo,
351 &nCntBytes );
352 if( 0 != ( nArrOffset = lGCount - nCntBytes ) )
353 memmove( pArr, pArr + nCntBytes, nArrOffset );
354 sWork.ReleaseBufferAccess( static_cast< xub_StrLen >(nNewLen) );
355
356 pStt = pLastStt = sWork.GetBufferAccess();
357 pEnd = pStt + nNewLen;
358 }
359 else
360 {
361 pStt = pLastStt = (sal_Unicode*)pArr;
362 pEnd = (sal_Unicode*)(pArr + lGCount);
363
364 if( bSwapUnicode )
365 {
366 sal_Char* pF = pArr, *pN = pArr + 1;
367 for( sal_uLong n = 0; n < lGCount; n += 2, pF += 2, pN += 2 )
368 {
369 sal_Char c = *pF;
370 *pF = *pN;
371 *pN = c;
372 }
373 }
374 }
375
376 *pEnd = 0;
377 nReadCnt += lGCount;
378
379 ::SetProgressState( nReadCnt, pDoc->GetDocShell() );
380
381 if( cLastCR )
382 {
383 if( 0x0a == *pStt && 0x0d == cLastCR )
384 pLastStt = ++pStt;
385 cLastCR = 0;
386 nLineLen = 0;
387 // JP 03.04.96: das letzte am Ende nehmen wir nicht
388 if( !rInput.IsEof() || !(pEnd == pStt ||
389 ( !*pEnd && pEnd == pStt+1 ) ) )
390 pDoc->SplitNode( *pPam->GetPoint(), false );
391 }
392 }
393
394 bool bIns = true, bSplitNode = false;
395 switch( *pStt )
396 {
397 //JP 12.11.2001: task 94636 - don't ignore all behind the zero character,
398 // change it to the default "control character"
399 // case 0:
400 // pEnd = pStt;
401 // bIns = false ;
402 // break;
403
404 case 0x0a: if( LINEEND_LF == pUseMe->GetParaFlags() )
405 {
406 bIns = false;
407 *pStt = 0;
408 ++pStt;
409
410 // JP 03.04.96: das letzte am Ende nehmen wir nicht
411 if( !rInput.IsEof() || pEnd != pStt )
412 bSplitNode = true;
413 }
414 break;
415
416 case 0x0d: if( LINEEND_LF != pUseMe->GetParaFlags() )
417 {
418 bIns = false;
419 *pStt = 0;
420 ++pStt;
421
422 bool bChkSplit = false;
423 if( LINEEND_CRLF == pUseMe->GetParaFlags() )
424 {
425 if( pStt == pEnd )
426 cLastCR = 0x0d;
427 else if( 0x0a == *pStt )
428 {
429 ++pStt;
430 bChkSplit = true;
431 }
432 }
433 else
434 bChkSplit = true;
435
436 // JP 03.04.96: das letzte am Ende nehmen wir nicht
437 if( bChkSplit && ( !rInput.IsEof() || pEnd != pStt ))
438 bSplitNode = true;
439 }
440 break;
441
442 case 0x0c:
443 {
444 // dann mal einen harten Seitenumbruch einfuegen
445 *pStt++ = 0;
446 if( nLineLen )
447 {
448 // Change to charset system!!!!
449 //rOpt.GetCharSet();
450 InsertText( String( pLastStt ));
451 }
452 pDoc->SplitNode( *pPam->GetPoint(), false );
453 pDoc->InsertPoolItem(
454 *pPam, SvxFmtBreakItem( SVX_BREAK_PAGE_BEFORE, RES_BREAK ), 0);
455 pLastStt = pStt;
456 nLineLen = 0;
457 bIns = false;
458 }
459 break;
460
461 case 0x1a:
462 if( nReadCnt == nFileSize && pStt+1 == pEnd )
463 *pStt = 0;
464 else
465 *pStt = '#'; // Ersatzdarstellung
466 break;
467
468 case '\t': break;
469
470 default:
471 if( ' ' > *pStt )
472 // Ctrl-Zchn gefunden ersetze durch '#'
473 *pStt = '#';
474 break;
475 }
476
477 if( bIns )
478 {
479 if( ( nLineLen >= MAX_ASCII_PARA - 100 ) &&
480 ( ( *pStt == ' ' ) || ( nLineLen >= MAX_ASCII_PARA - 1 ) ) )
481 {
482 sal_Unicode c = *pStt;
483 *pStt = 0;
484 InsertText( String( pLastStt ));
485 pDoc->SplitNode( *pPam->GetPoint(), false );
486 pLastStt = pStt;
487 nLineLen = 0;
488 *pStt = c;
489 }
490 ++pStt;
491 ++nLineLen;
492 }
493 else if( bSplitNode )
494 {
495 // es wurde ein CR/LF erkannt, also speichere den Text
496
497 InsertText( String( pLastStt ));
498 pDoc->SplitNode( *pPam->GetPoint(), false );
499 pLastStt = pStt;
500 nLineLen = 0;
501 }
502 } while(true);
503
504 if( hConverter )
505 {
506 rtl_destroyTextToUnicodeContext( hConverter, hContext );
507 rtl_destroyTextToUnicodeConverter( hConverter );
508 }
509 return 0;
510 }
511
InsertText(const String & rStr)512 void SwASCIIParser::InsertText( const String& rStr )
513 {
514 pDoc->InsertString( *pPam, rStr );
515 if( pItemSet && pBreakIt && nScript != ( SCRIPTTYPE_LATIN |
516 SCRIPTTYPE_ASIAN |
517 SCRIPTTYPE_COMPLEX ) )
518 nScript |= pBreakIt->GetAllScriptsOfText( rStr );
519 }
520
521 /* vim: set noet sw=4 ts=4: */
522