1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_lingucomponent.hxx"
30 
31 
32 #include <com/sun/star/uno/Reference.h>
33 #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
34 
35 #include <cppuhelper/factory.hxx>	// helper for factories
36 #include <com/sun/star/registry/XRegistryKey.hpp>
37 #include <i18npool/mslangid.hxx>
38 #include <unotools/pathoptions.hxx>
39 #include <unotools/useroptions.hxx>
40 #include <tools/debug.hxx>
41 #include <unotools/processfactory.hxx>
42 #include <osl/mutex.hxx>
43 
44 #include <hyphen.h>
45 #include <hyphenimp.hxx>
46 
47 #include <linguistic/hyphdta.hxx>
48 #include <rtl/ustring.hxx>
49 #include <rtl/ustrbuf.hxx>
50 #include <rtl/textenc.h>
51 
52 #include <linguistic/lngprops.hxx>
53 #include <unotools/pathoptions.hxx>
54 #include <unotools/useroptions.hxx>
55 #include <unotools/lingucfg.hxx>
56 #include <osl/file.hxx>
57 
58 #include "dictmgr.hxx"
59 
60 #include <stdio.h>
61 #include <string.h>
62 
63 #include <list>
64 #include <set>
65 
66 using namespace utl;
67 using namespace osl;
68 using namespace rtl;
69 using namespace com::sun::star;
70 using namespace com::sun::star::beans;
71 using namespace com::sun::star::lang;
72 using namespace com::sun::star::uno;
73 using namespace com::sun::star::linguistic2;
74 using namespace linguistic;
75 
76 // values asigned to capitalization types
77 #define CAPTYPE_UNKNOWN 0
78 #define CAPTYPE_NOCAP   1
79 #define CAPTYPE_INITCAP 2
80 #define CAPTYPE_ALLCAP  3
81 #define CAPTYPE_MIXED   4
82 
83 // min, max
84 
85 //#define Min(a,b) (a < b ? a : b)
86 #define Max(a,b) (a > b ? a : b)
87 
88 ///////////////////////////////////////////////////////////////////////////
89 
90 
91 Hyphenator::Hyphenator() :
92 	aEvtListeners	( GetLinguMutex() )
93 {
94 	bDisposing = sal_False;
95 	pPropHelper = NULL;
96     aDicts = NULL;
97     numdict = 0;
98 }
99 
100 
101 Hyphenator::~Hyphenator()
102 {
103 	if (pPropHelper)
104 		pPropHelper->RemoveAsPropListener();
105 
106     if ((numdict) && (aDicts))
107     {
108         for (int i=0; i < numdict; i++)
109         {
110             if (aDicts[i].apCC) delete aDicts[i].apCC;
111             aDicts[i].apCC = NULL;
112         }
113 	}
114     if (aDicts) delete[] aDicts;
115 	aDicts = NULL;
116     numdict = 0;
117 }
118 
119 
120 PropertyHelper_Hyphen & Hyphenator::GetPropHelper_Impl()
121 {
122 	if (!pPropHelper)
123 	{
124 		Reference< XPropertySet	>	xPropSet( GetLinguProperties(), UNO_QUERY );
125 
126 		pPropHelper	= new PropertyHelper_Hyphen ((XHyphenator *) this, xPropSet );
127 		xPropHelper = pPropHelper;
128 		pPropHelper->AddAsPropListener();	//! after a reference is established
129 	}
130 	return *pPropHelper;
131 
132 }
133 
134 
135 Sequence< Locale > SAL_CALL Hyphenator::getLocales()
136 		throw(RuntimeException)
137 {
138 	MutexGuard	aGuard( GetLinguMutex() );
139 
140     // this routine should return the locales supported by the installed
141     // dictionaries.
142 
143     if (!numdict)
144     {
145         SvtLinguConfig aLinguCfg;
146 
147         // get list of dictionaries-to-use
148 		// (or better speaking: the list of dictionaries using the
149 		// new configuration entries).
150         std::list< SvtLinguConfigDictionaryEntry > aDics;
151         uno::Sequence< rtl::OUString > aFormatList;
152         aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Hyphenators"),
153                 A2OU("org.openoffice.lingu.LibHnjHyphenator"), aFormatList );
154         sal_Int32 nLen = aFormatList.getLength();
155         for (sal_Int32 i = 0;  i < nLen;  ++i)
156         {
157             std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
158                     aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
159             aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
160         }
161 
162         //!! for compatibility with old dictionaries (the ones not using extensions
163         //!! or new configuration entries, but still using the dictionary.lst file)
164 		//!! Get the list of old style spell checking dictionaries to use...
165         std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
166 				GetOldStyleDics( "HYPH" ) );
167 
168 		// to prefer dictionaries with configuration entries we will only
169 		// use those old style dictionaries that add a language that
170 		// is not yet supported by the list od new style dictionaries
171 		MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
172 
173         numdict = aDics.size();
174         if (numdict)
175         {
176             // get supported locales from the dictionaries-to-use...
177             sal_Int32 k = 0;
178             std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet;
179             std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
180             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
181             {
182                 uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
183                 sal_Int32 nLen2 = aLocaleNames.getLength();
184                 for (k = 0;  k < nLen2;  ++k)
185                 {
186                     aLocaleNamesSet.insert( aLocaleNames[k] );
187                 }
188             }
189             // ... and add them to the resulting sequence
190             aSuppLocales.realloc( aLocaleNamesSet.size() );
191             std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB;
192             k = 0;
193             for (aItB = aLocaleNamesSet.begin();  aItB != aLocaleNamesSet.end();  ++aItB)
194             {
195                 Locale aTmp( MsLangId::convertLanguageToLocale(
196                         MsLangId::convertIsoStringToLanguage( *aItB )));
197                 aSuppLocales[k++] = aTmp;
198             }
199 
200             //! For each dictionary and each locale we need a seperate entry.
201             //! If this results in more than one dictionary per locale than (for now)
202 			//! it is undefined which dictionary gets used.
203 			//! In the future the implementation should support using several dictionaries
204 			//! for one locale.
205 			numdict = 0;
206             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
207 				numdict = numdict + aDictIt->aLocaleNames.getLength();
208 
209             // add dictionary information
210             aDicts = new HDInfo[numdict];
211 
212             k = 0;
213             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
214             {
215                 if (aDictIt->aLocaleNames.getLength() > 0 &&
216                     aDictIt->aLocations.getLength() > 0)
217                 {
218                     uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
219                     sal_Int32 nLocales = aLocaleNames.getLength();
220 
221                     // currently only one language per dictionary is supported in the actual implementation...
222                     // Thus here we work-around this by adding the same dictionary several times.
223                     // Once for each of it's supported locales.
224                     for (sal_Int32 i = 0;  i < nLocales;  ++i)
225                     {
226                         aDicts[k].aPtr = NULL;
227                         aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
228                         aDicts[k].aLoc = MsLangId::convertLanguageToLocale(
229                                         MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] ));
230                         aDicts[k].apCC = new CharClass( aDicts[k].aLoc );
231                         // also both files have to be in the same directory and the
232                         // file names must only differ in the extension (.aff/.dic).
233                         // Thus we use the first location only and strip the extension part.
234                         rtl::OUString aLocation = aDictIt->aLocations[0];
235                         sal_Int32 nPos = aLocation.lastIndexOf( '.' );
236                         aLocation = aLocation.copy( 0, nPos );
237                         aDicts[k].aName = aLocation;
238 
239                         ++k;
240                     }
241                 }
242             }
243             DBG_ASSERT( k == numdict, "index mismatch?" );
244         }
245         else
246         {
247             /* no dictionary found so register no dictionaries */
248             numdict = 0;
249             aDicts = NULL;
250             aSuppLocales.realloc(0);
251         }
252     }
253 
254     return aSuppLocales;
255 }
256 
257 
258 
259 sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
260 		throw(RuntimeException)
261 {
262 	MutexGuard	aGuard( GetLinguMutex() );
263 
264 	sal_Bool bRes = sal_False;
265 	if (!aSuppLocales.getLength())
266 		getLocales();
267 
268 	const Locale *pLocale = aSuppLocales.getConstArray();
269 	sal_Int32 nLen = aSuppLocales.getLength();
270 	for (sal_Int32 i = 0;  i < nLen;  ++i)
271 	{
272 		if (rLocale == pLocale[i])
273 		{
274 			bRes = sal_True;
275 			break;
276 		}
277 	}
278 	return bRes;
279 }
280 
281 
282 Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord,
283        const ::com::sun::star::lang::Locale& aLocale,
284        sal_Int16 nMaxLeading,
285        const ::com::sun::star::beans::PropertyValues& aProperties )
286        throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException)
287 {
288 	int nHyphenationPos = -1;
289     int nHyphenationPosAlt = -1;
290     int nHyphenationPosAltHyph = -1;
291 	int wordlen;
292 	char *hyphens;
293     char *lcword;
294     int k = 0;
295 
296     PropertyHelper_Hyphen & rHelper = GetPropHelper();
297     rHelper.SetTmpPropVals(aProperties);
298 	sal_Int16 minTrail = rHelper.GetMinTrailing();
299 	sal_Int16 minLead = rHelper.GetMinLeading();
300 	sal_Int16 minLen = rHelper.GetMinWordLength();
301 
302 	HyphenDict *dict = NULL;
303     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
304     CharClass * pCC = NULL;
305 
306 	Reference< XHyphenatedWord > xRes;
307 
308     k = -1;
309     for (int j = 0; j < numdict; j++)
310     {
311         if (aLocale == aDicts[j].aLoc)
312             k = j;
313     }
314 
315     // if we have a hyphenation dictionary matching this locale
316     if (k != -1)
317     {
318         // if this dictinary has not been loaded yet do that
319         if (!aDicts[k].aPtr)
320         {
321             OUString DictFN = aDicts[k].aName + A2OU(".dic");
322             OUString dictpath;
323 
324             osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
325             OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
326 
327 #if defined(WNT)
328             // workaround for Windows specifc problem that the
329             // path length in calls to 'fopen' is limted to somewhat
330             // about 120+ characters which will usually be exceed when
331             // using dictionaries as extensions.
332             sTmp = Win_GetShortPathName( dictpath );
333 #endif
334 
335             if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
336             {
337                fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
338                return NULL;
339             }
340             aDicts[k].aPtr = dict;
341             aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
342         }
343 
344         // other wise hyphenate the word with that dictionary
345         dict = aDicts[k].aPtr;
346         eEnc = aDicts[k].eEnc;
347         pCC =  aDicts[k].apCC;
348 
349         // we don't want to work with a default text encoding since following incorrect
350         // results may occur only for specific text and thus may be hard to notice.
351         // Thus better always make a clean exit here if the text encoding is in question.
352         // Hopefully something not working at all will raise proper attention quickly. ;-)
353         DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
354         if (eEnc == RTL_TEXTENCODING_DONTKNOW)
355             return NULL;
356 
357         sal_uInt16 ct = CAPTYPE_UNKNOWN;
358         ct = capitalType(aWord, pCC);
359 
360         // first convert any smart quotes or apostrophes to normal ones
361 	    OUStringBuffer rBuf(aWord);
362         sal_Int32 nc = rBuf.getLength();
363         sal_Unicode ch;
364         for (sal_Int32 ix=0; ix < nc; ix++)
365         {
366 	        ch = rBuf.charAt(ix);
367             if ((ch == 0x201C) || (ch == 0x201D))
368                 rBuf.setCharAt(ix,(sal_Unicode)0x0022);
369             if ((ch == 0x2018) || (ch == 0x2019))
370                 rBuf.setCharAt(ix,(sal_Unicode)0x0027);
371         }
372         OUString nWord(rBuf.makeStringAndClear());
373 
374         // now convert word to all lowercase for pattern recognition
375         OUString nTerm(makeLowerCase(nWord, pCC));
376 
377         // now convert word to needed encoding
378         OString encWord(OU2ENC(nTerm,eEnc));
379 
380 	    wordlen = encWord.getLength();
381         lcword = new char[wordlen + 1];
382 	    hyphens = new char[wordlen + 5];
383 
384         char ** rep = NULL; // replacements of discretionary hyphenation
385         int * pos = NULL; // array of [hyphenation point] minus [deletion position]
386         int * cut = NULL; // length of deletions in original word
387 
388         // copy converted word into simple char buffer
389         strcpy(lcword,encWord.getStr());
390 
391         // now strip off any ending periods
392         int n = wordlen-1;
393         while((n >=0) && (lcword[n] == '.'))
394             n--;
395         n++;
396         if (n > 0)
397         {
398             const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
399                     &rep, &pos, &cut, minLead, minTrail,
400                     Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead  - Max(dict->lhmin, 2))),
401                     Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
402             if (bFailed)
403             {
404                 //whoops something did not work
405                 delete[] hyphens;
406                 delete[] lcword;
407                 if (rep)
408                 {
409                     for(int j = 0; j < n; j++)
410                     {
411                         if (rep[j]) free(rep[j]);
412                     }
413                     free(rep);
414                 }
415                 if (pos) free(pos);
416                 if (cut) free(cut);
417                 return NULL;
418             }
419         }
420 
421         // now backfill hyphens[] for any removed trailing periods
422         for (int c = n; c < wordlen; c++) hyphens[c] = '0';
423         hyphens[wordlen] = '\0';
424 
425 	    sal_Int32 Leading =  GetPosInWordToCheck( aWord, nMaxLeading );
426 
427 	    for (sal_Int32 i = 0; i < n; i++)
428 	    {
429             int leftrep = 0;
430             sal_Bool hit = (n >= minLen);
431             if (!rep || !rep[i] || (i >= n))
432             {
433                 hit = hit && (hyphens[i]&1) && (i < Leading);
434                 hit = hit && (i >= (minLead-1) );
435                 hit = hit && ((n - i - 1) >= minTrail);
436             }
437             else
438             {
439                 // calculate change character length before hyphenation point signed with '='
440                 for (char * c = rep[i]; *c && (*c != '='); c++)
441                 {
442                     if (eEnc == RTL_TEXTENCODING_UTF8)
443                     {
444                         if (((unsigned char) *c) >> 6 != 2)
445                             leftrep++;
446                     }
447                     else
448                         leftrep++;
449                 }
450                 hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
451                 hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
452                 hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
453             }
454             if (hit)
455             {
456                 nHyphenationPos = i;
457                 if (rep && (i < n) && rep[i])
458                 {
459                     nHyphenationPosAlt = i - pos[i];
460                     nHyphenationPosAltHyph = i + leftrep - pos[i];
461                 }
462             }
463         }
464 
465         if (nHyphenationPos  == -1)
466         {
467             xRes = NULL;
468         }
469         else
470         {
471             if (rep && rep[nHyphenationPos])
472             {
473                 // remove equal sign
474                 char * s = rep[nHyphenationPos];
475                 int eq = 0;
476                 for (; *s; s++)
477                 {
478                     if (*s == '=') eq = 1;
479                     if (eq) *s = *(s + 1);
480                 }
481                 OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
482                 OUString repHyph;
483                 switch (ct)
484                 {
485                     case CAPTYPE_ALLCAP:
486                     {
487                         repHyph = makeUpperCase(repHyphlow, pCC);
488                         break;
489                     }
490                     case CAPTYPE_INITCAP:
491                     {
492                         if (nHyphenationPosAlt == 0)
493                             repHyph = makeInitCap(repHyphlow, pCC);
494                         else
495                              repHyph = repHyphlow;
496                         break;
497                     }
498                     default:
499                     {
500                         repHyph = repHyphlow;
501                         break;
502                     }
503                 }
504 
505                 // handle shortening
506                 sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
507                 nHyphenationPosAltHyph : nHyphenationPos);
508                 // dicretionary hyphenation
509                 xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), nPos,
510                     aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
511                     (sal_Int16) nHyphenationPosAltHyph);
512             }
513             else
514             {
515                 xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ),
516                     (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
517             }
518         }
519 
520         delete[] lcword;
521 	    delete[] hyphens;
522         if (rep)
523         {
524             for(int j = 0; j < n; j++)
525             {
526                 if (rep[j]) free(rep[j]);
527             }
528             free(rep);
529         }
530         if (pos) free(pos);
531         if (cut) free(cut);
532 	    return xRes;
533 	}
534     return NULL;
535 }
536 
537 
538 Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
539         const ::rtl::OUString& /*aWord*/,
540         const ::com::sun::star::lang::Locale& /*aLocale*/,
541         sal_Int16 /*nIndex*/,
542         const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
543         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
544 {
545   /* alternative spelling isn't supported by tex dictionaries */
546   /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
547   /* TASK: implement queryAlternativeSpelling() */
548   return NULL;
549 }
550 
551 Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord,
552         const ::com::sun::star::lang::Locale& aLocale,
553         const ::com::sun::star::beans::PropertyValues& aProperties )
554         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
555 {
556     int wordlen;
557     char *hyphens;
558     char *lcword;
559     int k;
560 
561     PropertyHelper_Hyphen & rHelper = GetPropHelper();
562     rHelper.SetTmpPropVals(aProperties);
563     sal_Int16 minTrail = rHelper.GetMinTrailing();
564     sal_Int16 minLead = rHelper.GetMinLeading();
565 
566     HyphenDict *dict = NULL;
567     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
568     CharClass* pCC = NULL;
569 
570     Reference< XPossibleHyphens > xRes;
571 
572     k = -1;
573     for (int j = 0; j < numdict; j++)
574     {
575         if (aLocale == aDicts[j].aLoc) k = j;
576     }
577 
578     // if we have a hyphenation dictionary matching this locale
579     if (k != -1)
580     {
581         // if this dictioanry has not been loaded yet do that
582         if (!aDicts[k].aPtr)
583         {
584             OUString DictFN = aDicts[k].aName + A2OU(".dic");
585             OUString dictpath;
586 
587             osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
588             OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
589 
590 #if defined(WNT)
591             // workaround for Windows specifc problem that the
592             // path length in calls to 'fopen' is limted to somewhat
593             // about 120+ characters which will usually be exceed when
594             // using dictionaries as extensions.
595             sTmp = Win_GetShortPathName( dictpath );
596 #endif
597 
598             if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
599             {
600                fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
601                return NULL;
602             }
603             aDicts[k].aPtr = dict;
604             aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
605         }
606 
607         // other wise hyphenate the word with that dictionary
608         dict = aDicts[k].aPtr;
609         eEnc = aDicts[k].eEnc;
610         pCC  = aDicts[k].apCC;
611 
612         // we don't want to work with a default text encoding since following incorrect
613         // results may occur only for specific text and thus may be hard to notice.
614         // Thus better always make a clean exit here if the text encoding is in question.
615         // Hopefully something not working at all will raise proper attention quickly. ;-)
616         DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
617         if (eEnc == RTL_TEXTENCODING_DONTKNOW)
618             return NULL;
619 
620         // first handle smart quotes both single and double
621         OUStringBuffer rBuf(aWord);
622         sal_Int32 nc = rBuf.getLength();
623         sal_Unicode ch;
624         for (sal_Int32 ix=0; ix < nc; ix++)
625         {
626             ch = rBuf.charAt(ix);
627             if ((ch == 0x201C) || (ch == 0x201D))
628                 rBuf.setCharAt(ix,(sal_Unicode)0x0022);
629             if ((ch == 0x2018) || (ch == 0x2019))
630                 rBuf.setCharAt(ix,(sal_Unicode)0x0027);
631         }
632         OUString nWord(rBuf.makeStringAndClear());
633 
634         // now convert word to all lowercase for pattern recognition
635         OUString nTerm(makeLowerCase(nWord, pCC));
636 
637         // now convert word to needed encoding
638         OString encWord(OU2ENC(nTerm,eEnc));
639 
640         wordlen = encWord.getLength();
641         lcword = new char[wordlen+1];
642         hyphens = new char[wordlen+5];
643         char ** rep = NULL; // replacements of discretionary hyphenation
644         int * pos = NULL; // array of [hyphenation point] minus [deletion position]
645         int * cut = NULL; // length of deletions in original word
646 
647         // copy converted word into simple char buffer
648         strcpy(lcword,encWord.getStr());
649 
650         // first remove any trailing periods
651         int n = wordlen-1;
652         while((n >=0) && (lcword[n] == '.'))
653             n--;
654         n++;
655         // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr);
656         if (n > 0)
657         {
658             const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
659                     &rep, &pos, &cut, minLead, minTrail,
660                     Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
661                     Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
662             if (bFailed)
663             {
664                 delete[] hyphens;
665                 delete[] lcword;
666 
667                 if (rep)
668                 {
669                     for(int j = 0; j < n; j++)
670                     {
671                         if (rep[j]) free(rep[j]);
672                     }
673                     free(rep);
674                 }
675                 if (pos) free(pos);
676                 if (cut) free(cut);
677 
678                 return NULL;
679             }
680         }
681         // now backfill hyphens[] for any removed periods
682         for (int c = n; c < wordlen; c++)
683             hyphens[c] = '0';
684         hyphens[wordlen] = '\0';
685         // fprintf(stderr,"... %s\n",hyphens); fflush(stderr);
686 
687         sal_Int16 nHyphCount = 0;
688         sal_Int16 i;
689 
690         for ( i = 0; i < encWord.getLength(); i++)
691         {
692             if (hyphens[i]&1 && (!rep || !rep[i]))
693                 nHyphCount++;
694         }
695 
696         Sequence< sal_Int16 > aHyphPos(nHyphCount);
697         sal_Int16 *pPos = aHyphPos.getArray();
698         OUStringBuffer hyphenatedWordBuffer;
699         OUString hyphenatedWord;
700         nHyphCount = 0;
701 
702         for (i = 0; i < nWord.getLength(); i++)
703         {
704             hyphenatedWordBuffer.append(aWord[i]);
705             // hyphenation position (not alternative)
706             if (hyphens[i]&1 && (!rep || !rep[i]))
707             {
708                 pPos[nHyphCount] = i;
709                 hyphenatedWordBuffer.append(sal_Unicode('='));
710                 nHyphCount++;
711             }
712         }
713 
714         hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
715         //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord));
716         //fflush(stderr);
717 
718         xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ),
719                   hyphenatedWord, aHyphPos );
720 
721         delete[] hyphens;
722         delete[] lcword;
723 
724         if (rep)
725         {
726             for(int j = 0; j < n; j++)
727             {
728                 if (rep[j]) free(rep[j]);
729             }
730             free(rep);
731         }
732         if (pos) free(pos);
733         if (cut) free(cut);
734 
735         return xRes;
736     }
737 
738     return NULL;
739 }
740 
741 sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC)
742 {
743     sal_Int32 tlen = aTerm.getLength();
744     if ((pCC) && (tlen))
745     {
746         String aStr(aTerm);
747         sal_Int32 nc = 0;
748         for (xub_StrLen tindex = 0; tindex < tlen;  tindex++)
749         {
750             if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER)
751                 nc++;
752         }
753 
754         if (nc == 0)
755             return (sal_uInt16) CAPTYPE_NOCAP;
756         if (nc == tlen)
757             return (sal_uInt16) CAPTYPE_ALLCAP;
758         if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER))
759             return (sal_uInt16) CAPTYPE_INITCAP;
760 
761         return (sal_uInt16) CAPTYPE_MIXED;
762     }
763     return (sal_uInt16) CAPTYPE_UNKNOWN;
764 }
765 
766 OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
767 {
768     if (pCC)
769         return pCC->toLower_rtl(aTerm, 0, aTerm.getLength());
770     return aTerm;
771 }
772 
773 OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
774 {
775     if (pCC)
776         return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength());
777     return aTerm;
778 }
779 
780 
781 OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
782 {
783     sal_Int32 tlen = aTerm.getLength();
784     if ((pCC) && (tlen))
785     {
786         OUString bTemp = aTerm.copy(0,1);
787         if (tlen > 1)
788             return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) );
789 
790         return pCC->toUpper_rtl(bTemp, 0, 1);
791 	}
792     return aTerm;
793 }
794 
795 
796 Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
797         const Reference< XMultiServiceFactory > & /*rSMgr*/ )
798         throw(Exception)
799 {
800 	Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
801 	return xService;
802 }
803 
804 
805 sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
806         const Reference< XLinguServiceEventListener >& rxLstnr )
807         throw(RuntimeException)
808 {
809 	MutexGuard	aGuard( GetLinguMutex() );
810 
811 	sal_Bool bRes = sal_False;
812 	if (!bDisposing && rxLstnr.is())
813 	{
814 		bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
815 	}
816 	return bRes;
817 }
818 
819 
820 sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
821         const Reference< XLinguServiceEventListener >& rxLstnr )
822         throw(RuntimeException)
823 {
824 	MutexGuard	aGuard( GetLinguMutex() );
825 
826 	sal_Bool bRes = sal_False;
827 	if (!bDisposing && rxLstnr.is())
828 	{
829 		DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" );
830 		bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
831 	}
832 	return bRes;
833 }
834 
835 
836 OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
837         throw(RuntimeException)
838 {
839 	MutexGuard	aGuard( GetLinguMutex() );
840 	return A2OU( "Libhyphen Hyphenator" );
841 }
842 
843 
844 void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
845 		throw(Exception, RuntimeException)
846 {
847 	MutexGuard	aGuard( GetLinguMutex() );
848 
849 	if (!pPropHelper)
850 	{
851 		sal_Int32 nLen = rArguments.getLength();
852 		if (2 == nLen)
853 		{
854 			Reference< XPropertySet	>	xPropSet;
855 			rArguments.getConstArray()[0] >>= xPropSet;
856 			//rArguments.getConstArray()[1] >>= xDicList;
857 
858 			//! Pointer allows for access of the non-UNO functions.
859 			//! And the reference to the UNO-functions while increasing
860 			//! the ref-count and will implicitly free the memory
861 			//! when the object is not longer used.
862 			pPropHelper = new PropertyHelper_Hyphen( (XHyphenator *) this, xPropSet );
863 			xPropHelper = pPropHelper;
864 			pPropHelper->AddAsPropListener();	//! after a reference is established
865 		}
866         else
867         {
868 			DBG_ERROR( "wrong number of arguments in sequence" );
869         }
870 	}
871 }
872 
873 
874 void SAL_CALL Hyphenator::dispose()
875 		throw(RuntimeException)
876 {
877 	MutexGuard	aGuard( GetLinguMutex() );
878 
879 	if (!bDisposing)
880 	{
881 		bDisposing = sal_True;
882 		EventObject	aEvtObj( (XHyphenator *) this );
883 		aEvtListeners.disposeAndClear( aEvtObj );
884 	}
885 }
886 
887 
888 void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
889 		throw(RuntimeException)
890 {
891 	MutexGuard	aGuard( GetLinguMutex() );
892 
893 	if (!bDisposing && rxListener.is())
894 		aEvtListeners.addInterface( rxListener );
895 }
896 
897 
898 void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
899 		throw(RuntimeException)
900 {
901 	MutexGuard	aGuard( GetLinguMutex() );
902 
903 	if (!bDisposing && rxListener.is())
904 		aEvtListeners.removeInterface( rxListener );
905 }
906 
907 
908 ///////////////////////////////////////////////////////////////////////////
909 // Service specific part
910 //
911 
912 OUString SAL_CALL Hyphenator::getImplementationName()
913 		throw(RuntimeException)
914 {
915 	MutexGuard	aGuard( GetLinguMutex() );
916 
917 	return getImplementationName_Static();
918 }
919 
920 
921 sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
922 		throw(RuntimeException)
923 {
924 	MutexGuard	aGuard( GetLinguMutex() );
925 
926 	Sequence< OUString > aSNL = getSupportedServiceNames();
927 	const OUString * pArray = aSNL.getConstArray();
928 	for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
929 		if( pArray[i] == ServiceName )
930 			return sal_True;
931 	return sal_False;
932 }
933 
934 
935 Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
936 		throw(RuntimeException)
937 {
938 	MutexGuard	aGuard( GetLinguMutex() );
939 
940 	return getSupportedServiceNames_Static();
941 }
942 
943 
944 Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
945 		throw()
946 {
947 	MutexGuard	aGuard( GetLinguMutex() );
948 
949 	Sequence< OUString > aSNS( 1 );	// auch mehr als 1 Service moeglich
950 	aSNS.getArray()[0] = A2OU( SN_HYPHENATOR );
951 	return aSNS;
952 }
953 
954 void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
955 			XMultiServiceFactory * pServiceManager, void *  )
956 {
957 	void * pRet = 0;
958 	if ( !Hyphenator::getImplementationName_Static().compareToAscii( pImplName ) )
959 	{
960 		Reference< XSingleServiceFactory > xFactory =
961 			cppu::createOneInstanceFactory(
962 				pServiceManager,
963 				Hyphenator::getImplementationName_Static(),
964 				Hyphenator_CreateInstance,
965 				Hyphenator::getSupportedServiceNames_Static());
966 		// acquire, because we return an interface pointer instead of a reference
967 		xFactory->acquire();
968 		pRet = xFactory.get();
969 	}
970 	return pRet;
971 }
972 
973 
974 ///////////////////////////////////////////////////////////////////////////
975 
976 #undef CAPTYPE_UNKNOWN
977 #undef CAPTYPE_NOCAP
978 #undef CAPTYPE_INITCAP
979 #undef CAPTYPE_ALLCAP
980 #undef CAPTYPE_MIXED
981