xref: /aoo41x/main/sw/source/core/text/guess.cxx (revision efeef26f)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sw.hxx"
26 
27 
28 #include <ctype.h>
29 #include <editeng/unolingu.hxx>
30 #include <tools/shl.hxx>    // needed for SW_MOD() macro
31 #include <errhdl.hxx>   // ASSERTs
32 #include <dlelstnr.hxx>
33 #include <swmodule.hxx>
34 #include <IDocumentSettingAccess.hxx>
35 #include <txtcfg.hxx>
36 #include <guess.hxx>
37 #include <inftxt.hxx>
38 #include <pagefrm.hxx>
39 #include <pagedesc.hxx> // SwPageDesc
40 #include <tgrditem.hxx>
41 #include <com/sun/star/i18n/BreakType.hpp>
42 #include <com/sun/star/i18n/WordType.hpp>
43 #include <unotools/charclass.hxx>
44 #include <porfld.hxx>
45 
46 using ::rtl::OUString;
47 using namespace ::com::sun::star;
48 using namespace ::com::sun::star::uno;
49 using namespace ::com::sun::star::i18n;
50 using namespace ::com::sun::star::beans;
51 using namespace ::com::sun::star::linguistic2;
52 
53 #define CH_FULL_BLANK 0x3000
54 
55 /*************************************************************************
56  *						SwTxtGuess::Guess
57  *
58  * provides information for line break calculation
59  * returns true if no line break has to be performed
60  * otherwise possible break or hyphenation position is determined
61  *************************************************************************/
62 
63 sal_Bool SwTxtGuess::Guess( const SwTxtPortion& rPor, SwTxtFormatInfo &rInf,
64                             const KSHORT nPorHeight )
65 {
66 	nCutPos = rInf.GetIdx();
67 
68 	// Leere Strings sind immer 0
69 	if( !rInf.GetLen() || !rInf.GetTxt().Len() )
70 		return sal_False;
71 
72     ASSERT( rInf.GetIdx() < rInf.GetTxt().Len(),
73 			"+SwTxtGuess::Guess: invalid SwTxtFormatInfo" );
74 
75     ASSERT( nPorHeight, "+SwTxtGuess::Guess: no height" );
76 
77     sal_uInt16 nMinSize;
78     sal_uInt16 nMaxSizeDiff;
79 
80     const SwScriptInfo& rSI =
81             ((SwParaPortion*)rInf.GetParaPortion())->GetScriptInfo();
82 
83     sal_uInt16 nMaxComp = ( SW_CJK == rInf.GetFont()->GetActual() ) &&
84                         rSI.CountCompChg() &&
85                         ! rInf.IsMulti() &&
86                         ! rPor.InFldGrp() &&
87                         ! rPor.IsDropPortion() ?
88                         10000 :
89                             0 ;
90 
91     SwTwips nLineWidth = rInf.Width() - rInf.X();
92 	xub_StrLen nMaxLen = rInf.GetTxt().Len() - rInf.GetIdx();
93 
94     if ( rInf.GetLen() < nMaxLen )
95         nMaxLen = rInf.GetLen();
96 
97     if( !nMaxLen )
98 		return sal_False;
99 
100 	KSHORT nItalic = 0;
101     if( ITALIC_NONE != rInf.GetFont()->GetItalic() && !rInf.NotEOL() )
102     {
103         sal_Bool bAddItalic = sal_True;
104 
105         // do not add extra italic value if we have an active character grid
106         if ( rInf.SnapToGrid() )
107         {
108             GETGRID( rInf.GetTxtFrm()->FindPageFrm() )
109             bAddItalic = !pGrid || GRID_LINES_CHARS != pGrid->GetGridType();
110         }
111 
112         // do not add extra italic value for an isolated blank:
113         if ( 1 == rInf.GetLen() &&
114              CH_BLANK == rInf.GetTxt().GetChar( rInf.GetIdx() ) )
115             bAddItalic = sal_False;
116 
117         nItalic = bAddItalic ? nPorHeight / 12 : 0;
118 
119         nLineWidth -= nItalic;
120 
121         // --> FME 2005-05-13 #i46524# LineBreak bug with italics
122         if ( nLineWidth < 0 ) nLineWidth = 0;
123         // <--
124 	}
125 
126 	// first check if everything fits to line
127     if ( long ( nLineWidth ) * 2 > long ( nMaxLen ) * nPorHeight )
128 	{
129         // call GetTxtSize with maximum compression (for kanas)
130         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nMaxLen,
131                          nMaxComp, nMinSize, nMaxSizeDiff );
132 
133         nBreakWidth = nMinSize;
134 
135 		if ( nBreakWidth <= nLineWidth )
136 		{
137 			// portion fits to line
138 			nCutPos = rInf.GetIdx() + nMaxLen;
139 			if( nItalic &&
140                 ( nCutPos >= rInf.GetTxt().Len() ||
141                   // --> FME 2005-05-13 #i48035# Needed for CalcFitToContent
142                   // if first line ends with a manual line break
143                   rInf.GetTxt().GetChar( nCutPos ) == CH_BREAK ) )
144                   // <--
145 				nBreakWidth = nBreakWidth + nItalic;
146 
147             // save maximum width for later use
148             if ( nMaxSizeDiff )
149                 rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
150 
151             return sal_True;
152 		}
153 	}
154 
155 	sal_Bool bHyph = rInf.IsHyphenate() && !rInf.IsHyphForbud();
156 	xub_StrLen nHyphPos = 0;
157 
158 	// nCutPos is the first character not fitting to the current line
159 	// nHyphPos is the first character not fitting to the current line,
160 	// considering an additional "-" for hyphenation
161 	if( bHyph )
162 	{
163         nCutPos = rInf.GetTxtBreak( nLineWidth, nMaxLen, nMaxComp, nHyphPos );
164 
165         if ( !nHyphPos && rInf.GetIdx() )
166 			nHyphPos = rInf.GetIdx() - 1;
167 	}
168 	else
169     {
170         nCutPos = rInf.GetTxtBreak( nLineWidth, nMaxLen, nMaxComp );
171 
172 #ifdef DBG_UTIL
173         if ( STRING_LEN != nCutPos )
174         {
175             rInf.GetTxtSize( &rSI, rInf.GetIdx(), nCutPos - rInf.GetIdx(),
176                              nMaxComp, nMinSize, nMaxSizeDiff );
177             ASSERT( nMinSize <= nLineWidth, "What a Guess!!!" );
178         }
179 #endif
180     }
181 
182 	if( nCutPos > rInf.GetIdx() + nMaxLen )
183 	{
184 		// second check if everything fits to line
185 		nCutPos = nBreakPos = rInf.GetIdx() + nMaxLen - 1;
186         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nMaxLen, nMaxComp,
187                          nMinSize, nMaxSizeDiff );
188 
189         nBreakWidth = nMinSize;
190 
191 		// Der folgende Vergleich sollte eigenlich immer sal_True ergeben, sonst
192 		// hat es wohl bei GetTxtBreak einen Pixel-Rundungsfehler gegeben...
193 		if ( nBreakWidth <= nLineWidth )
194 		{
195 			if( nItalic && ( nBreakPos + 1 ) >= rInf.GetTxt().Len() )
196 				nBreakWidth = nBreakWidth + nItalic;
197 
198             // save maximum width for later use
199             if ( nMaxSizeDiff )
200                 rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
201 
202             return sal_True;
203 		}
204 	}
205 
206     // we have to trigger an underflow for a footnote portion
207     // which does not fit to the current line
208     if ( rPor.IsFtnPortion() )
209     {
210         nBreakPos = rInf.GetIdx();
211         nCutPos = rInf.GetLen();
212         return sal_False;
213     }
214 
215     xub_StrLen nPorLen = 0;
216     // do not call the break iterator nCutPos is a blank
217     xub_Unicode cCutChar = rInf.GetTxt().GetChar( nCutPos );
218     if( CH_BLANK == cCutChar || CH_FULL_BLANK == cCutChar )
219     {
220 		nBreakPos = nCutPos;
221 		xub_StrLen nX = nBreakPos;
222 
223         // we step back until a non blank character has been found
224         // or there is only one more character left
225         while( nX && nBreakPos > rInf.GetLineStart() + 1 &&
226                ( CH_BLANK == ( cCutChar = rInf.GetChar( --nX ) ) ||
227                  CH_FULL_BLANK == cCutChar ) )
228             --nBreakPos;
229 
230 		if( nBreakPos > rInf.GetIdx() )
231 			nPorLen = nBreakPos - rInf.GetIdx();
232         while( ++nCutPos < rInf.GetTxt().Len() &&
233                ( CH_BLANK == ( cCutChar = rInf.GetChar( nCutPos ) ) ||
234                  CH_FULL_BLANK == cCutChar ) )
235 			; // nothing
236 
237 		nBreakStart = nCutPos;
238 	}
239 	else if( pBreakIt->GetBreakIter().is() )
240 	{
241         // New: We should have a look into the last portion, if it was a
242         // field portion. For this, we expand the text of the field portion
243         // into our string. If the line break position is inside of before
244         // the field portion, we trigger an underflow.
245 
246         xub_StrLen nOldIdx = rInf.GetIdx();
247         xub_Unicode cFldChr = 0;
248 
249 #if OSL_DEBUG_LEVEL > 1
250         XubString aDebugString;
251 #endif
252 
253         // be careful: a field portion can be both: 0x01 (common field)
254         // or 0x02 (the follow of a footnode)
255         if ( rInf.GetLast() && rInf.GetLast()->InFldGrp() &&
256              ! rInf.GetLast()->IsFtnPortion() &&
257              rInf.GetIdx() > rInf.GetLineStart() &&
258              CH_TXTATR_BREAKWORD ==
259                 ( cFldChr = rInf.GetTxt().GetChar( rInf.GetIdx() - 1 ) ) )
260         {
261             SwFldPortion* pFld = (SwFldPortion*)rInf.GetLast();
262             XubString aTxt;
263             pFld->GetExpTxt( rInf, aTxt );
264 
265             if ( aTxt.Len() )
266             {
267                 nFieldDiff = aTxt.Len() - 1;
268                 nCutPos = nCutPos + nFieldDiff;
269                 nHyphPos = nHyphPos + nFieldDiff;
270 
271 #if OSL_DEBUG_LEVEL > 1
272                 aDebugString = rInf.GetTxt();
273 #endif
274 
275                 XubString& rOldTxt = (XubString&)rInf.GetTxt();
276                 rOldTxt.Erase( rInf.GetIdx() - 1, 1 );
277                 rOldTxt.Insert( aTxt, rInf.GetIdx() - 1 );
278                 rInf.SetIdx( rInf.GetIdx() + nFieldDiff );
279             }
280             else
281                 cFldChr = 0;
282         }
283 
284         LineBreakHyphenationOptions aHyphOpt;
285 		Reference< XHyphenator >  xHyph;
286 		if( bHyph )
287 		{
288 			xHyph = ::GetHyphenator();
289 			aHyphOpt = LineBreakHyphenationOptions( xHyph,
290 								rInf.GetHyphValues(), nHyphPos );
291 		}
292 
293         // Get Language for break iterator.
294         // We have to switch the current language if we have a script
295         // change at nCutPos. Otherwise LATIN punctuation would never
296         // be allowed to be hanging punctuation.
297         // NEVER call GetLang if the string has been modified!!!
298         LanguageType aLang = rInf.GetFont()->GetLanguage();
299 
300         // If we are inside a field portion, we use a temporar string which
301         // differs from the string at the textnode. Therefore we are not allowed
302         // to call the GetLang function.
303         if ( nCutPos && ! rPor.InFldGrp() )
304         {
305             const CharClass& rCC = GetAppCharClass();
306 
307             // step back until a non-punctuation character is reached
308             xub_StrLen nLangIndex = nCutPos;
309 
310             // If a field has been expanded right in front of us we do not
311             // step further than the beginning of the expanded field
312             // (which is the position of the field placeholder in our
313             // original string).
314             const xub_StrLen nDoNotStepOver = CH_TXTATR_BREAKWORD == cFldChr ?
315                                               rInf.GetIdx() - nFieldDiff - 1:
316                                               0;
317 
318             while ( nLangIndex > nDoNotStepOver &&
319                     ! rCC.isLetterNumeric( rInf.GetTxt(), nLangIndex ) )
320                 --nLangIndex;
321 
322             // last "real" character is not inside our current portion
323             // we have to check the script type of the last "real" character
324             if ( nLangIndex < rInf.GetIdx() )
325             {
326                 sal_uInt16 nScript = pBreakIt->GetRealScriptOfText( rInf.GetTxt(),
327                                                                 nLangIndex );
328                 ASSERT( nScript, "Script is not between 1 and 4" );
329 
330                 // compare current script with script from last "real" character
331                 if ( nScript - 1 != rInf.GetFont()->GetActual() )
332                     aLang = rInf.GetTxtFrm()->GetTxtNode()->GetLang(
333                         CH_TXTATR_BREAKWORD == cFldChr ?
334                         nDoNotStepOver :
335                         nLangIndex, 0, nScript );
336             }
337         }
338 
339 		const ForbiddenCharacters aForbidden(
340                 *rInf.GetTxtFrm()->GetNode()->getIDocumentSettingAccess()->getForbiddenCharacters( aLang, true ) );
341 
342         const sal_Bool bAllowHanging = rInf.IsHanging() && ! rInf.IsMulti() &&
343                                       ! rPor.InFldGrp();
344 
345 		LineBreakUserOptions aUserOpt(
346 				aForbidden.beginLine, aForbidden.endLine,
347                 rInf.HasForbiddenChars(), bAllowHanging, sal_False );
348 
349         //! register listener to LinguServiceEvents now in order to get
350         //! notified about relevant changes in the future
351         SwModule *pModule = SW_MOD();
352         if (!pModule->GetLngSvcEvtListener().is())
353             pModule->CreateLngSvcEvtListener();
354 
355         // !!! We must have a local copy of the locale, because inside
356         // getLineBreak the LinguEventListener can trigger a new formatting,
357         // which can corrupt the locale pointer inside pBreakIt.
358         const lang::Locale aLocale = pBreakIt->GetLocale( aLang );
359 
360         // determines first possible line break from nRightPos to
361         // start index of current line
362         LineBreakResults aResult = pBreakIt->GetBreakIter()->getLineBreak(
363             rInf.GetTxt(), nCutPos, aLocale,
364             rInf.GetLineStart(), aHyphOpt, aUserOpt );
365 
366         nBreakPos = (xub_StrLen)aResult.breakIndex;
367 
368         // if we are formatting multi portions we want to allow line breaks
369         // at the border between single line and multi line portion
370         // we have to be carefull with footnote portions, they always come in
371         // with an index 0
372         if ( nBreakPos < rInf.GetLineStart() && rInf.IsFirstMulti() &&
373              ! rInf.IsFtnInside() )
374             nBreakPos = rInf.GetLineStart();
375 
376         nBreakStart = nBreakPos;
377 
378         bHyph = BreakType::HYPHENATION == aResult.breakType;
379 
380 		if ( bHyph && nBreakPos != STRING_LEN)
381 		{
382 			// found hyphenation position within line
383 			// nBreakPos is set to the hyphenation position
384 			xHyphWord = aResult.rHyphenatedWord;
385             nBreakPos += xHyphWord->getHyphenationPos() + 1;
386 
387 #if OSL_DEBUG_LEVEL > 1
388             // e.g., Schif-fahrt, referes to our string
389             const String aWord = xHyphWord->getWord();
390             // e.g., Schiff-fahrt, referes to the word after hyphenation
391             const String aHyphenatedWord = xHyphWord->getHyphenatedWord();
392             // e.g., Schif-fahrt: 5, referes to our string
393             const sal_uInt16 nHyphenationPos = xHyphWord->getHyphenationPos();
394             (void)nHyphenationPos;
395             // e.g., Schiff-fahrt: 6, referes to the word after hyphenation
396             const sal_uInt16 nHyphenPos = xHyphWord->getHyphenPos();
397             (void)nHyphenPos;
398 #endif
399 
400             // if not in interactive mode, we have to break behind a soft hyphen
401             if ( ! rInf.IsInterHyph() && rInf.GetIdx() )
402             {
403                 const long nSoftHyphPos =
404                         xHyphWord->getWord().indexOf( CHAR_SOFTHYPHEN );
405 
406                 if ( nSoftHyphPos >= 0 &&
407                      nBreakStart + nSoftHyphPos <= nBreakPos &&
408                      nBreakPos > rInf.GetLineStart() )
409                     nBreakPos = rInf.GetIdx() - 1;
410             }
411 
412             if( nBreakPos >= rInf.GetIdx() )
413 			{
414 				nPorLen = nBreakPos - rInf.GetIdx();
415 				if( '-' == rInf.GetTxt().GetChar( nBreakPos - 1 ) )
416 					xHyphWord = NULL;
417 			}
418 		}
419         else if ( !bHyph && nBreakPos >= rInf.GetLineStart() )
420 		{
421             ASSERT( nBreakPos != STRING_LEN, "we should have found a break pos" );
422 
423 			// found break position within line
424 			xHyphWord = NULL;
425 
426             // check, if break position is soft hyphen and an underflow
427             // has to be triggered
428             if( nBreakPos > rInf.GetLineStart() && rInf.GetIdx() &&
429                 CHAR_SOFTHYPHEN == rInf.GetTxt().GetChar( nBreakPos - 1 ) )
430 				nBreakPos = rInf.GetIdx() - 1;
431 
432             // Delete any blanks at the end of a line, but be careful:
433             // If a field has been expanded, we do not want to delete any
434             // blanks inside the field portion. This would cause an unwanted
435             // underflow
436             xub_StrLen nX = nBreakPos;
437             while( nX > rInf.GetLineStart() &&
438                    ( CH_TXTATR_BREAKWORD != cFldChr || nX > rInf.GetIdx() ) &&
439                    ( CH_BLANK == rInf.GetChar( --nX ) ||
440                      CH_FULL_BLANK == rInf.GetChar( nX ) ) )
441                 nBreakPos = nX;
442             if( nBreakPos > rInf.GetIdx() )
443 				nPorLen = nBreakPos - rInf.GetIdx();
444 		}
445         else
446         {
447 			// no line break found, setting nBreakPos to STRING_LEN
448 			// causes a break cut
449 			nBreakPos = STRING_LEN;
450 			ASSERT( nCutPos >= rInf.GetIdx(), "Deep cut" );
451 			nPorLen = nCutPos - rInf.GetIdx();
452 		}
453 
454 		if( nBreakPos > nCutPos && nBreakPos != STRING_LEN )
455 		{
456             const xub_StrLen nHangingLen = nBreakPos - nCutPos;
457             SwPosSize aTmpSize = rInf.GetTxtSize( &rSI, nCutPos,
458                                                   nHangingLen, 0 );
459 			ASSERT( !pHanging, "A hanging portion is hanging around" );
460 			pHanging = new SwHangingPortion( aTmpSize );
461             pHanging->SetLen( nHangingLen );
462             nPorLen = nCutPos - rInf.GetIdx();
463         }
464 
465         // If we expanded a field, we must repair the original string.
466         // In case we do not trigger an underflow, we correct the nBreakPos
467         // value, but we cannot correct the nBreakStart value:
468         // If we have found a hyphenation position, nBreakStart can lie before
469         // the field.
470         if ( CH_TXTATR_BREAKWORD == cFldChr )
471         {
472             if ( nBreakPos < rInf.GetIdx() )
473                 nBreakPos = nOldIdx - 1;
474             else if ( STRING_LEN != nBreakPos )
475             {
476                 ASSERT( nBreakPos >= nFieldDiff, "I've got field trouble!" );
477                 nBreakPos = nBreakPos - nFieldDiff;
478             }
479 
480             ASSERT( nCutPos >= rInf.GetIdx() && nCutPos >= nFieldDiff,
481                     "I've got field trouble, part2!" );
482             nCutPos = nCutPos - nFieldDiff;
483 
484             XubString& rOldTxt = (XubString&)rInf.GetTxt();
485             rOldTxt.Erase( nOldIdx - 1, nFieldDiff + 1 );
486             rOldTxt.Insert( cFldChr, nOldIdx - 1 );
487             rInf.SetIdx( nOldIdx );
488 
489 #if OSL_DEBUG_LEVEL > 1
490             ASSERT( aDebugString == rInf.GetTxt(),
491                     "Somebody, somebody, somebody put something in my string" );
492 #endif
493         }
494     }
495 
496 	if( nPorLen )
497     {
498         rInf.GetTxtSize( &rSI, rInf.GetIdx(), nPorLen,
499                          nMaxComp, nMinSize, nMaxSizeDiff );
500 
501         // save maximum width for later use
502         if ( nMaxSizeDiff )
503             rInf.SetMaxWidthDiff( (sal_uLong)&rPor, nMaxSizeDiff );
504 
505         nBreakWidth = nItalic + nMinSize;
506     }
507 	else
508 		nBreakWidth = 0;
509 
510     if( pHanging )
511         nBreakPos = nCutPos;
512 
513     return sal_False;
514 }
515 
516 /*************************************************************************
517  *						SwTxtGuess::AlternativeSpelling
518  *************************************************************************/
519 
520 // returns true if word at position nPos has a diffenrent spelling
521 // if hyphenated at this position (old german spelling)
522 
523 sal_Bool SwTxtGuess::AlternativeSpelling( const SwTxtFormatInfo &rInf,
524 	const xub_StrLen nPos )
525 {
526 	// get word boundaries
527 	xub_StrLen nWordLen;
528 
529 	Boundary aBound =
530 		pBreakIt->GetBreakIter()->getWordBoundary( rInf.GetTxt(), nPos,
531 		pBreakIt->GetLocale( rInf.GetFont()->GetLanguage() ),
532 		WordType::DICTIONARY_WORD, sal_True );
533 	nBreakStart = (xub_StrLen)aBound.startPos;
534     nWordLen = static_cast<xub_StrLen>(aBound.endPos - nBreakStart);
535 
536     // if everything else fails, we want to cut at nPos
537     nCutPos = nPos;
538 
539 	XubString aTxt( rInf.GetTxt().Copy( nBreakStart, nWordLen ) );
540 
541 	// check, if word has alternative spelling
542 	Reference< XHyphenator >  xHyph( ::GetHyphenator() );
543 	ASSERT( xHyph.is(), "Hyphenator is missing");
544 	//! subtract 1 since the UNO-interface is 0 based
545 	xHyphWord =	xHyph->queryAlternativeSpelling( OUString(aTxt),
546 						pBreakIt->GetLocale( rInf.GetFont()->GetLanguage() ),
547 						nPos - nBreakStart, rInf.GetHyphValues() );
548 	return xHyphWord.is() && xHyphWord->isAlternativeSpelling();
549 }
550 
551