1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_lingucomponent.hxx" 30 31 32 #include <com/sun/star/uno/Reference.h> 33 #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp> 34 35 #include <cppuhelper/factory.hxx> // helper for factories 36 #include <com/sun/star/registry/XRegistryKey.hpp> 37 #include <i18npool/mslangid.hxx> 38 #include <unotools/pathoptions.hxx> 39 #include <unotools/useroptions.hxx> 40 #include <tools/debug.hxx> 41 #include <unotools/processfactory.hxx> 42 #include <osl/mutex.hxx> 43 44 #include <hyphen.h> 45 #include <hyphenimp.hxx> 46 47 #include <linguistic/hyphdta.hxx> 48 #include <rtl/ustring.hxx> 49 #include <rtl/ustrbuf.hxx> 50 #include <rtl/textenc.h> 51 52 #include <linguistic/lngprops.hxx> 53 #include <unotools/pathoptions.hxx> 54 #include <unotools/useroptions.hxx> 55 #include <unotools/lingucfg.hxx> 56 #include <osl/file.hxx> 57 58 #include "dictmgr.hxx" 59 60 #include <stdio.h> 61 #include <string.h> 62 63 #include <list> 64 #include <set> 65 66 using namespace utl; 67 using namespace osl; 68 using namespace rtl; 69 using namespace com::sun::star; 70 using namespace com::sun::star::beans; 71 using namespace com::sun::star::lang; 72 using namespace com::sun::star::uno; 73 using namespace com::sun::star::linguistic2; 74 using namespace linguistic; 75 76 // values asigned to capitalization types 77 #define CAPTYPE_UNKNOWN 0 78 #define CAPTYPE_NOCAP 1 79 #define CAPTYPE_INITCAP 2 80 #define CAPTYPE_ALLCAP 3 81 #define CAPTYPE_MIXED 4 82 83 // min, max 84 85 //#define Min(a,b) (a < b ? a : b) 86 #define Max(a,b) (a > b ? a : b) 87 88 /////////////////////////////////////////////////////////////////////////// 89 90 91 Hyphenator::Hyphenator() : 92 aEvtListeners ( GetLinguMutex() ) 93 { 94 bDisposing = sal_False; 95 pPropHelper = NULL; 96 aDicts = NULL; 97 numdict = 0; 98 } 99 100 101 Hyphenator::~Hyphenator() 102 { 103 if (pPropHelper) 104 pPropHelper->RemoveAsPropListener(); 105 106 if ((numdict) && (aDicts)) 107 { 108 for (int i=0; i < numdict; i++) 109 { 110 if (aDicts[i].apCC) delete aDicts[i].apCC; 111 aDicts[i].apCC = NULL; 112 } 113 } 114 if (aDicts) delete[] aDicts; 115 aDicts = NULL; 116 numdict = 0; 117 } 118 119 120 PropertyHelper_Hyphen & Hyphenator::GetPropHelper_Impl() 121 { 122 if (!pPropHelper) 123 { 124 Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY ); 125 126 pPropHelper = new PropertyHelper_Hyphen ((XHyphenator *) this, xPropSet ); 127 xPropHelper = pPropHelper; 128 pPropHelper->AddAsPropListener(); //! after a reference is established 129 } 130 return *pPropHelper; 131 132 } 133 134 135 Sequence< Locale > SAL_CALL Hyphenator::getLocales() 136 throw(RuntimeException) 137 { 138 MutexGuard aGuard( GetLinguMutex() ); 139 140 // this routine should return the locales supported by the installed 141 // dictionaries. 142 143 if (!numdict) 144 { 145 SvtLinguConfig aLinguCfg; 146 147 // get list of dictionaries-to-use 148 // (or better speaking: the list of dictionaries using the 149 // new configuration entries). 150 std::list< SvtLinguConfigDictionaryEntry > aDics; 151 uno::Sequence< rtl::OUString > aFormatList; 152 aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Hyphenators"), 153 A2OU("org.openoffice.lingu.LibHnjHyphenator"), aFormatList ); 154 sal_Int32 nLen = aFormatList.getLength(); 155 for (sal_Int32 i = 0; i < nLen; ++i) 156 { 157 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( 158 aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) ); 159 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); 160 } 161 162 //!! for compatibility with old dictionaries (the ones not using extensions 163 //!! or new configuration entries, but still using the dictionary.lst file) 164 //!! Get the list of old style spell checking dictionaries to use... 165 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( 166 GetOldStyleDics( "HYPH" ) ); 167 168 // to prefer dictionaries with configuration entries we will only 169 // use those old style dictionaries that add a language that 170 // is not yet supported by the list od new style dictionaries 171 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); 172 173 numdict = aDics.size(); 174 if (numdict) 175 { 176 // get supported locales from the dictionaries-to-use... 177 sal_Int32 k = 0; 178 std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet; 179 std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt; 180 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) 181 { 182 uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames ); 183 sal_Int32 nLen2 = aLocaleNames.getLength(); 184 for (k = 0; k < nLen2; ++k) 185 { 186 aLocaleNamesSet.insert( aLocaleNames[k] ); 187 } 188 } 189 // ... and add them to the resulting sequence 190 aSuppLocales.realloc( aLocaleNamesSet.size() ); 191 std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB; 192 k = 0; 193 for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB) 194 { 195 Locale aTmp( MsLangId::convertLanguageToLocale( 196 MsLangId::convertIsoStringToLanguage( *aItB ))); 197 aSuppLocales[k++] = aTmp; 198 } 199 200 //! For each dictionary and each locale we need a seperate entry. 201 //! If this results in more than one dictionary per locale than (for now) 202 //! it is undefined which dictionary gets used. 203 //! In the future the implementation should support using several dictionaries 204 //! for one locale. 205 numdict = 0; 206 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) 207 numdict = numdict + aDictIt->aLocaleNames.getLength(); 208 209 // add dictionary information 210 aDicts = new HDInfo[numdict]; 211 212 k = 0; 213 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) 214 { 215 if (aDictIt->aLocaleNames.getLength() > 0 && 216 aDictIt->aLocations.getLength() > 0) 217 { 218 uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames ); 219 sal_Int32 nLocales = aLocaleNames.getLength(); 220 221 // currently only one language per dictionary is supported in the actual implementation... 222 // Thus here we work-around this by adding the same dictionary several times. 223 // Once for each of it's supported locales. 224 for (sal_Int32 i = 0; i < nLocales; ++i) 225 { 226 aDicts[k].aPtr = NULL; 227 aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW; 228 aDicts[k].aLoc = MsLangId::convertLanguageToLocale( 229 MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] )); 230 aDicts[k].apCC = new CharClass( aDicts[k].aLoc ); 231 // also both files have to be in the same directory and the 232 // file names must only differ in the extension (.aff/.dic). 233 // Thus we use the first location only and strip the extension part. 234 rtl::OUString aLocation = aDictIt->aLocations[0]; 235 sal_Int32 nPos = aLocation.lastIndexOf( '.' ); 236 aLocation = aLocation.copy( 0, nPos ); 237 aDicts[k].aName = aLocation; 238 239 ++k; 240 } 241 } 242 } 243 DBG_ASSERT( k == numdict, "index mismatch?" ); 244 } 245 else 246 { 247 /* no dictionary found so register no dictionaries */ 248 numdict = 0; 249 aDicts = NULL; 250 aSuppLocales.realloc(0); 251 } 252 } 253 254 return aSuppLocales; 255 } 256 257 258 259 sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) 260 throw(RuntimeException) 261 { 262 MutexGuard aGuard( GetLinguMutex() ); 263 264 sal_Bool bRes = sal_False; 265 if (!aSuppLocales.getLength()) 266 getLocales(); 267 268 const Locale *pLocale = aSuppLocales.getConstArray(); 269 sal_Int32 nLen = aSuppLocales.getLength(); 270 for (sal_Int32 i = 0; i < nLen; ++i) 271 { 272 if (rLocale == pLocale[i]) 273 { 274 bRes = sal_True; 275 break; 276 } 277 } 278 return bRes; 279 } 280 281 282 Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord, 283 const ::com::sun::star::lang::Locale& aLocale, 284 sal_Int16 nMaxLeading, 285 const ::com::sun::star::beans::PropertyValues& aProperties ) 286 throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException) 287 { 288 int nHyphenationPos = -1; 289 int nHyphenationPosAlt = -1; 290 int nHyphenationPosAltHyph = -1; 291 int wordlen; 292 char *hyphens; 293 char *lcword; 294 int k = 0; 295 296 PropertyHelper_Hyphen & rHelper = GetPropHelper(); 297 rHelper.SetTmpPropVals(aProperties); 298 sal_Int16 minTrail = rHelper.GetMinTrailing(); 299 sal_Int16 minLead = rHelper.GetMinLeading(); 300 sal_Int16 minLen = rHelper.GetMinWordLength(); 301 302 HyphenDict *dict = NULL; 303 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; 304 CharClass * pCC = NULL; 305 306 Reference< XHyphenatedWord > xRes; 307 308 k = -1; 309 for (int j = 0; j < numdict; j++) 310 { 311 if (aLocale == aDicts[j].aLoc) 312 k = j; 313 } 314 315 // if we have a hyphenation dictionary matching this locale 316 if (k != -1) 317 { 318 // if this dictinary has not been loaded yet do that 319 if (!aDicts[k].aPtr) 320 { 321 OUString DictFN = aDicts[k].aName + A2OU(".dic"); 322 OUString dictpath; 323 324 osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); 325 OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); 326 327 #if defined(WNT) 328 // workaround for Windows specifc problem that the 329 // path length in calls to 'fopen' is limted to somewhat 330 // about 120+ characters which will usually be exceed when 331 // using dictionaries as extensions. 332 sTmp = Win_GetShortPathName( dictpath ); 333 #endif 334 335 if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) 336 { 337 fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) ); 338 return NULL; 339 } 340 aDicts[k].aPtr = dict; 341 aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset); 342 } 343 344 // other wise hyphenate the word with that dictionary 345 dict = aDicts[k].aPtr; 346 eEnc = aDicts[k].eEnc; 347 pCC = aDicts[k].apCC; 348 349 // we don't want to work with a default text encoding since following incorrect 350 // results may occur only for specific text and thus may be hard to notice. 351 // Thus better always make a clean exit here if the text encoding is in question. 352 // Hopefully something not working at all will raise proper attention quickly. ;-) 353 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); 354 if (eEnc == RTL_TEXTENCODING_DONTKNOW) 355 return NULL; 356 357 sal_uInt16 ct = CAPTYPE_UNKNOWN; 358 ct = capitalType(aWord, pCC); 359 360 // first convert any smart quotes or apostrophes to normal ones 361 OUStringBuffer rBuf(aWord); 362 sal_Int32 nc = rBuf.getLength(); 363 sal_Unicode ch; 364 for (sal_Int32 ix=0; ix < nc; ix++) 365 { 366 ch = rBuf.charAt(ix); 367 if ((ch == 0x201C) || (ch == 0x201D)) 368 rBuf.setCharAt(ix,(sal_Unicode)0x0022); 369 if ((ch == 0x2018) || (ch == 0x2019)) 370 rBuf.setCharAt(ix,(sal_Unicode)0x0027); 371 } 372 OUString nWord(rBuf.makeStringAndClear()); 373 374 // now convert word to all lowercase for pattern recognition 375 OUString nTerm(makeLowerCase(nWord, pCC)); 376 377 // now convert word to needed encoding 378 OString encWord(OU2ENC(nTerm,eEnc)); 379 380 wordlen = encWord.getLength(); 381 lcword = new char[wordlen + 1]; 382 hyphens = new char[wordlen + 5]; 383 384 char ** rep = NULL; // replacements of discretionary hyphenation 385 int * pos = NULL; // array of [hyphenation point] minus [deletion position] 386 int * cut = NULL; // length of deletions in original word 387 388 // copy converted word into simple char buffer 389 strcpy(lcword,encWord.getStr()); 390 391 // now strip off any ending periods 392 int n = wordlen-1; 393 while((n >=0) && (lcword[n] == '.')) 394 n--; 395 n++; 396 if (n > 0) 397 { 398 const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL, 399 &rep, &pos, &cut, minLead, minTrail, 400 Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), 401 Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) ); 402 if (bFailed) 403 { 404 //whoops something did not work 405 delete[] hyphens; 406 delete[] lcword; 407 if (rep) 408 { 409 for(int j = 0; j < n; j++) 410 { 411 if (rep[j]) free(rep[j]); 412 } 413 free(rep); 414 } 415 if (pos) free(pos); 416 if (cut) free(cut); 417 return NULL; 418 } 419 } 420 421 // now backfill hyphens[] for any removed trailing periods 422 for (int c = n; c < wordlen; c++) hyphens[c] = '0'; 423 hyphens[wordlen] = '\0'; 424 425 sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); 426 427 for (sal_Int32 i = 0; i < n; i++) 428 { 429 int leftrep = 0; 430 sal_Bool hit = (n >= minLen); 431 if (!rep || !rep[i] || (i >= n)) 432 { 433 hit = hit && (hyphens[i]&1) && (i < Leading); 434 hit = hit && (i >= (minLead-1) ); 435 hit = hit && ((n - i - 1) >= minTrail); 436 } 437 else 438 { 439 // calculate change character length before hyphenation point signed with '=' 440 for (char * c = rep[i]; *c && (*c != '='); c++) 441 { 442 if (eEnc == RTL_TEXTENCODING_UTF8) 443 { 444 if (((unsigned char) *c) >> 6 != 2) 445 leftrep++; 446 } 447 else 448 leftrep++; 449 } 450 hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); 451 hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); 452 hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); 453 } 454 if (hit) 455 { 456 nHyphenationPos = i; 457 if (rep && (i < n) && rep[i]) 458 { 459 nHyphenationPosAlt = i - pos[i]; 460 nHyphenationPosAltHyph = i + leftrep - pos[i]; 461 } 462 } 463 } 464 465 if (nHyphenationPos == -1) 466 { 467 xRes = NULL; 468 } 469 else 470 { 471 if (rep && rep[nHyphenationPos]) 472 { 473 // remove equal sign 474 char * s = rep[nHyphenationPos]; 475 int eq = 0; 476 for (; *s; s++) 477 { 478 if (*s == '=') eq = 1; 479 if (eq) *s = *(s + 1); 480 } 481 OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); 482 OUString repHyph; 483 switch (ct) 484 { 485 case CAPTYPE_ALLCAP: 486 { 487 repHyph = makeUpperCase(repHyphlow, pCC); 488 break; 489 } 490 case CAPTYPE_INITCAP: 491 { 492 if (nHyphenationPosAlt == 0) 493 repHyph = makeInitCap(repHyphlow, pCC); 494 else 495 repHyph = repHyphlow; 496 break; 497 } 498 default: 499 { 500 repHyph = repHyphlow; 501 break; 502 } 503 } 504 505 // handle shortening 506 sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ? 507 nHyphenationPosAltHyph : nHyphenationPos); 508 // dicretionary hyphenation 509 xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), nPos, 510 aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), 511 (sal_Int16) nHyphenationPosAltHyph); 512 } 513 else 514 { 515 xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), 516 (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos); 517 } 518 } 519 520 delete[] lcword; 521 delete[] hyphens; 522 if (rep) 523 { 524 for(int j = 0; j < n; j++) 525 { 526 if (rep[j]) free(rep[j]); 527 } 528 free(rep); 529 } 530 if (pos) free(pos); 531 if (cut) free(cut); 532 return xRes; 533 } 534 return NULL; 535 } 536 537 538 Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( 539 const ::rtl::OUString& /*aWord*/, 540 const ::com::sun::star::lang::Locale& /*aLocale*/, 541 sal_Int16 /*nIndex*/, 542 const ::com::sun::star::beans::PropertyValues& /*aProperties*/ ) 543 throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) 544 { 545 /* alternative spelling isn't supported by tex dictionaries */ 546 /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */ 547 /* TASK: implement queryAlternativeSpelling() */ 548 return NULL; 549 } 550 551 Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord, 552 const ::com::sun::star::lang::Locale& aLocale, 553 const ::com::sun::star::beans::PropertyValues& aProperties ) 554 throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) 555 { 556 int wordlen; 557 char *hyphens; 558 char *lcword; 559 int k; 560 561 PropertyHelper_Hyphen & rHelper = GetPropHelper(); 562 rHelper.SetTmpPropVals(aProperties); 563 sal_Int16 minTrail = rHelper.GetMinTrailing(); 564 sal_Int16 minLead = rHelper.GetMinLeading(); 565 566 HyphenDict *dict = NULL; 567 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; 568 CharClass* pCC = NULL; 569 570 Reference< XPossibleHyphens > xRes; 571 572 k = -1; 573 for (int j = 0; j < numdict; j++) 574 { 575 if (aLocale == aDicts[j].aLoc) k = j; 576 } 577 578 // if we have a hyphenation dictionary matching this locale 579 if (k != -1) 580 { 581 // if this dictioanry has not been loaded yet do that 582 if (!aDicts[k].aPtr) 583 { 584 OUString DictFN = aDicts[k].aName + A2OU(".dic"); 585 OUString dictpath; 586 587 osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); 588 OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); 589 590 #if defined(WNT) 591 // workaround for Windows specifc problem that the 592 // path length in calls to 'fopen' is limted to somewhat 593 // about 120+ characters which will usually be exceed when 594 // using dictionaries as extensions. 595 sTmp = Win_GetShortPathName( dictpath ); 596 #endif 597 598 if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) 599 { 600 fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) ); 601 return NULL; 602 } 603 aDicts[k].aPtr = dict; 604 aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset); 605 } 606 607 // other wise hyphenate the word with that dictionary 608 dict = aDicts[k].aPtr; 609 eEnc = aDicts[k].eEnc; 610 pCC = aDicts[k].apCC; 611 612 // we don't want to work with a default text encoding since following incorrect 613 // results may occur only for specific text and thus may be hard to notice. 614 // Thus better always make a clean exit here if the text encoding is in question. 615 // Hopefully something not working at all will raise proper attention quickly. ;-) 616 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); 617 if (eEnc == RTL_TEXTENCODING_DONTKNOW) 618 return NULL; 619 620 // first handle smart quotes both single and double 621 OUStringBuffer rBuf(aWord); 622 sal_Int32 nc = rBuf.getLength(); 623 sal_Unicode ch; 624 for (sal_Int32 ix=0; ix < nc; ix++) 625 { 626 ch = rBuf.charAt(ix); 627 if ((ch == 0x201C) || (ch == 0x201D)) 628 rBuf.setCharAt(ix,(sal_Unicode)0x0022); 629 if ((ch == 0x2018) || (ch == 0x2019)) 630 rBuf.setCharAt(ix,(sal_Unicode)0x0027); 631 } 632 OUString nWord(rBuf.makeStringAndClear()); 633 634 // now convert word to all lowercase for pattern recognition 635 OUString nTerm(makeLowerCase(nWord, pCC)); 636 637 // now convert word to needed encoding 638 OString encWord(OU2ENC(nTerm,eEnc)); 639 640 wordlen = encWord.getLength(); 641 lcword = new char[wordlen+1]; 642 hyphens = new char[wordlen+5]; 643 char ** rep = NULL; // replacements of discretionary hyphenation 644 int * pos = NULL; // array of [hyphenation point] minus [deletion position] 645 int * cut = NULL; // length of deletions in original word 646 647 // copy converted word into simple char buffer 648 strcpy(lcword,encWord.getStr()); 649 650 // first remove any trailing periods 651 int n = wordlen-1; 652 while((n >=0) && (lcword[n] == '.')) 653 n--; 654 n++; 655 // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr); 656 if (n > 0) 657 { 658 const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, 659 &rep, &pos, &cut, minLead, minTrail, 660 Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), 661 Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) ); 662 if (bFailed) 663 { 664 delete[] hyphens; 665 delete[] lcword; 666 667 if (rep) 668 { 669 for(int j = 0; j < n; j++) 670 { 671 if (rep[j]) free(rep[j]); 672 } 673 free(rep); 674 } 675 if (pos) free(pos); 676 if (cut) free(cut); 677 678 return NULL; 679 } 680 } 681 // now backfill hyphens[] for any removed periods 682 for (int c = n; c < wordlen; c++) 683 hyphens[c] = '0'; 684 hyphens[wordlen] = '\0'; 685 // fprintf(stderr,"... %s\n",hyphens); fflush(stderr); 686 687 sal_Int16 nHyphCount = 0; 688 sal_Int16 i; 689 690 for ( i = 0; i < encWord.getLength(); i++) 691 { 692 if (hyphens[i]&1 && (!rep || !rep[i])) 693 nHyphCount++; 694 } 695 696 Sequence< sal_Int16 > aHyphPos(nHyphCount); 697 sal_Int16 *pPos = aHyphPos.getArray(); 698 OUStringBuffer hyphenatedWordBuffer; 699 OUString hyphenatedWord; 700 nHyphCount = 0; 701 702 for (i = 0; i < nWord.getLength(); i++) 703 { 704 hyphenatedWordBuffer.append(aWord[i]); 705 // hyphenation position (not alternative) 706 if (hyphens[i]&1 && (!rep || !rep[i])) 707 { 708 pPos[nHyphCount] = i; 709 hyphenatedWordBuffer.append(sal_Unicode('=')); 710 nHyphCount++; 711 } 712 } 713 714 hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); 715 //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord)); 716 //fflush(stderr); 717 718 xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ), 719 hyphenatedWord, aHyphPos ); 720 721 delete[] hyphens; 722 delete[] lcword; 723 724 if (rep) 725 { 726 for(int j = 0; j < n; j++) 727 { 728 if (rep[j]) free(rep[j]); 729 } 730 free(rep); 731 } 732 if (pos) free(pos); 733 if (cut) free(cut); 734 735 return xRes; 736 } 737 738 return NULL; 739 } 740 741 sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC) 742 { 743 sal_Int32 tlen = aTerm.getLength(); 744 if ((pCC) && (tlen)) 745 { 746 String aStr(aTerm); 747 sal_Int32 nc = 0; 748 for (xub_StrLen tindex = 0; tindex < tlen; tindex++) 749 { 750 if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER) 751 nc++; 752 } 753 754 if (nc == 0) 755 return (sal_uInt16) CAPTYPE_NOCAP; 756 if (nc == tlen) 757 return (sal_uInt16) CAPTYPE_ALLCAP; 758 if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER)) 759 return (sal_uInt16) CAPTYPE_INITCAP; 760 761 return (sal_uInt16) CAPTYPE_MIXED; 762 } 763 return (sal_uInt16) CAPTYPE_UNKNOWN; 764 } 765 766 OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC) 767 { 768 if (pCC) 769 return pCC->toLower_rtl(aTerm, 0, aTerm.getLength()); 770 return aTerm; 771 } 772 773 OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC) 774 { 775 if (pCC) 776 return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength()); 777 return aTerm; 778 } 779 780 781 OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC) 782 { 783 sal_Int32 tlen = aTerm.getLength(); 784 if ((pCC) && (tlen)) 785 { 786 OUString bTemp = aTerm.copy(0,1); 787 if (tlen > 1) 788 return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) ); 789 790 return pCC->toUpper_rtl(bTemp, 0, 1); 791 } 792 return aTerm; 793 } 794 795 796 Reference< XInterface > SAL_CALL Hyphenator_CreateInstance( 797 const Reference< XMultiServiceFactory > & /*rSMgr*/ ) 798 throw(Exception) 799 { 800 Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator; 801 return xService; 802 } 803 804 805 sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( 806 const Reference< XLinguServiceEventListener >& rxLstnr ) 807 throw(RuntimeException) 808 { 809 MutexGuard aGuard( GetLinguMutex() ); 810 811 sal_Bool bRes = sal_False; 812 if (!bDisposing && rxLstnr.is()) 813 { 814 bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); 815 } 816 return bRes; 817 } 818 819 820 sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener( 821 const Reference< XLinguServiceEventListener >& rxLstnr ) 822 throw(RuntimeException) 823 { 824 MutexGuard aGuard( GetLinguMutex() ); 825 826 sal_Bool bRes = sal_False; 827 if (!bDisposing && rxLstnr.is()) 828 { 829 DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" ); 830 bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); 831 } 832 return bRes; 833 } 834 835 836 OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ ) 837 throw(RuntimeException) 838 { 839 MutexGuard aGuard( GetLinguMutex() ); 840 return A2OU( "Libhyphen Hyphenator" ); 841 } 842 843 844 void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments ) 845 throw(Exception, RuntimeException) 846 { 847 MutexGuard aGuard( GetLinguMutex() ); 848 849 if (!pPropHelper) 850 { 851 sal_Int32 nLen = rArguments.getLength(); 852 if (2 == nLen) 853 { 854 Reference< XPropertySet > xPropSet; 855 rArguments.getConstArray()[0] >>= xPropSet; 856 //rArguments.getConstArray()[1] >>= xDicList; 857 858 //! Pointer allows for access of the non-UNO functions. 859 //! And the reference to the UNO-functions while increasing 860 //! the ref-count and will implicitly free the memory 861 //! when the object is not longer used. 862 pPropHelper = new PropertyHelper_Hyphen( (XHyphenator *) this, xPropSet ); 863 xPropHelper = pPropHelper; 864 pPropHelper->AddAsPropListener(); //! after a reference is established 865 } 866 else 867 { 868 DBG_ERROR( "wrong number of arguments in sequence" ); 869 } 870 } 871 } 872 873 874 void SAL_CALL Hyphenator::dispose() 875 throw(RuntimeException) 876 { 877 MutexGuard aGuard( GetLinguMutex() ); 878 879 if (!bDisposing) 880 { 881 bDisposing = sal_True; 882 EventObject aEvtObj( (XHyphenator *) this ); 883 aEvtListeners.disposeAndClear( aEvtObj ); 884 } 885 } 886 887 888 void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) 889 throw(RuntimeException) 890 { 891 MutexGuard aGuard( GetLinguMutex() ); 892 893 if (!bDisposing && rxListener.is()) 894 aEvtListeners.addInterface( rxListener ); 895 } 896 897 898 void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) 899 throw(RuntimeException) 900 { 901 MutexGuard aGuard( GetLinguMutex() ); 902 903 if (!bDisposing && rxListener.is()) 904 aEvtListeners.removeInterface( rxListener ); 905 } 906 907 908 /////////////////////////////////////////////////////////////////////////// 909 // Service specific part 910 // 911 912 OUString SAL_CALL Hyphenator::getImplementationName() 913 throw(RuntimeException) 914 { 915 MutexGuard aGuard( GetLinguMutex() ); 916 917 return getImplementationName_Static(); 918 } 919 920 921 sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName ) 922 throw(RuntimeException) 923 { 924 MutexGuard aGuard( GetLinguMutex() ); 925 926 Sequence< OUString > aSNL = getSupportedServiceNames(); 927 const OUString * pArray = aSNL.getConstArray(); 928 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) 929 if( pArray[i] == ServiceName ) 930 return sal_True; 931 return sal_False; 932 } 933 934 935 Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames() 936 throw(RuntimeException) 937 { 938 MutexGuard aGuard( GetLinguMutex() ); 939 940 return getSupportedServiceNames_Static(); 941 } 942 943 944 Sequence< OUString > Hyphenator::getSupportedServiceNames_Static() 945 throw() 946 { 947 MutexGuard aGuard( GetLinguMutex() ); 948 949 Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich 950 aSNS.getArray()[0] = A2OU( SN_HYPHENATOR ); 951 return aSNS; 952 } 953 954 void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName, 955 XMultiServiceFactory * pServiceManager, void * ) 956 { 957 void * pRet = 0; 958 if ( !Hyphenator::getImplementationName_Static().compareToAscii( pImplName ) ) 959 { 960 Reference< XSingleServiceFactory > xFactory = 961 cppu::createOneInstanceFactory( 962 pServiceManager, 963 Hyphenator::getImplementationName_Static(), 964 Hyphenator_CreateInstance, 965 Hyphenator::getSupportedServiceNames_Static()); 966 // acquire, because we return an interface pointer instead of a reference 967 xFactory->acquire(); 968 pRet = xFactory.get(); 969 } 970 return pRet; 971 } 972 973 974 /////////////////////////////////////////////////////////////////////////// 975 976 #undef CAPTYPE_UNKNOWN 977 #undef CAPTYPE_NOCAP 978 #undef CAPTYPE_INITCAP 979 #undef CAPTYPE_ALLCAP 980 #undef CAPTYPE_MIXED 981