1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_lingucomponent.hxx"
26 
27 
28 #include <com/sun/star/uno/Reference.h>
29 #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
30 
31 #include <cppuhelper/factory.hxx>	// helper for factories
32 #include <com/sun/star/registry/XRegistryKey.hpp>
33 #include <i18npool/mslangid.hxx>
34 #include <unotools/pathoptions.hxx>
35 #include <unotools/useroptions.hxx>
36 #include <tools/debug.hxx>
37 #include <unotools/processfactory.hxx>
38 #include <osl/mutex.hxx>
39 
40 #include <hyphen.h>
41 #include <hyphenimp.hxx>
42 
43 #include <linguistic/hyphdta.hxx>
44 #include <rtl/ustring.hxx>
45 #include <rtl/ustrbuf.hxx>
46 #include <rtl/textenc.h>
47 
48 #include <linguistic/lngprops.hxx>
49 #include <unotools/pathoptions.hxx>
50 #include <unotools/useroptions.hxx>
51 #include <unotools/lingucfg.hxx>
52 #include <osl/file.hxx>
53 
54 #include "dictmgr.hxx"
55 
56 #include <stdio.h>
57 #include <string.h>
58 
59 #include <list>
60 #include <set>
61 
62 using namespace utl;
63 using namespace osl;
64 using namespace rtl;
65 using namespace com::sun::star;
66 using namespace com::sun::star::beans;
67 using namespace com::sun::star::lang;
68 using namespace com::sun::star::uno;
69 using namespace com::sun::star::linguistic2;
70 using namespace linguistic;
71 
72 // values asigned to capitalization types
73 #define CAPTYPE_UNKNOWN 0
74 #define CAPTYPE_NOCAP   1
75 #define CAPTYPE_INITCAP 2
76 #define CAPTYPE_ALLCAP  3
77 #define CAPTYPE_MIXED   4
78 
79 // min, max
80 
81 //#define Min(a,b) (a < b ? a : b)
82 #define Max(a,b) (a > b ? a : b)
83 
84 ///////////////////////////////////////////////////////////////////////////
85 
86 
87 Hyphenator::Hyphenator() :
88 	aEvtListeners	( GetLinguMutex() )
89 {
90 	bDisposing = sal_False;
91 	pPropHelper = NULL;
92     aDicts = NULL;
93     numdict = 0;
94 }
95 
96 
97 Hyphenator::~Hyphenator()
98 {
99 	if (pPropHelper)
100 		pPropHelper->RemoveAsPropListener();
101 
102     if ((numdict) && (aDicts))
103     {
104         for (int i=0; i < numdict; i++)
105         {
106             if (aDicts[i].apCC) delete aDicts[i].apCC;
107             aDicts[i].apCC = NULL;
108         }
109 	}
110     if (aDicts) delete[] aDicts;
111 	aDicts = NULL;
112     numdict = 0;
113 }
114 
115 
116 PropertyHelper_Hyphen & Hyphenator::GetPropHelper_Impl()
117 {
118 	if (!pPropHelper)
119 	{
120 		Reference< XPropertySet	>	xPropSet( GetLinguProperties(), UNO_QUERY );
121 
122 		pPropHelper	= new PropertyHelper_Hyphen ((XHyphenator *) this, xPropSet );
123 		xPropHelper = pPropHelper;
124 		pPropHelper->AddAsPropListener();	//! after a reference is established
125 	}
126 	return *pPropHelper;
127 
128 }
129 
130 
131 Sequence< Locale > SAL_CALL Hyphenator::getLocales()
132 		throw(RuntimeException)
133 {
134 	MutexGuard	aGuard( GetLinguMutex() );
135 
136     // this routine should return the locales supported by the installed
137     // dictionaries.
138 
139     if (!numdict)
140     {
141         SvtLinguConfig aLinguCfg;
142 
143         // get list of dictionaries-to-use
144 		// (or better speaking: the list of dictionaries using the
145 		// new configuration entries).
146         std::list< SvtLinguConfigDictionaryEntry > aDics;
147         uno::Sequence< rtl::OUString > aFormatList;
148         aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Hyphenators"),
149                 A2OU("org.openoffice.lingu.LibHnjHyphenator"), aFormatList );
150         sal_Int32 nLen = aFormatList.getLength();
151         for (sal_Int32 i = 0;  i < nLen;  ++i)
152         {
153             std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
154                     aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
155             aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
156         }
157 
158         //!! for compatibility with old dictionaries (the ones not using extensions
159         //!! or new configuration entries, but still using the dictionary.lst file)
160 		//!! Get the list of old style spell checking dictionaries to use...
161         std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
162 				GetOldStyleDics( "HYPH" ) );
163 
164 		// to prefer dictionaries with configuration entries we will only
165 		// use those old style dictionaries that add a language that
166 		// is not yet supported by the list od new style dictionaries
167 		MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
168 
169         numdict = aDics.size();
170         if (numdict)
171         {
172             // get supported locales from the dictionaries-to-use...
173             sal_Int32 k = 0;
174             std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet;
175             std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
176             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
177             {
178                 uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
179                 sal_Int32 nLen2 = aLocaleNames.getLength();
180                 for (k = 0;  k < nLen2;  ++k)
181                 {
182                     aLocaleNamesSet.insert( aLocaleNames[k] );
183                 }
184             }
185             // ... and add them to the resulting sequence
186             aSuppLocales.realloc( aLocaleNamesSet.size() );
187             std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB;
188             k = 0;
189             for (aItB = aLocaleNamesSet.begin();  aItB != aLocaleNamesSet.end();  ++aItB)
190             {
191                 Locale aTmp( MsLangId::convertLanguageToLocale(
192                         MsLangId::convertIsoStringToLanguage( *aItB )));
193                 aSuppLocales[k++] = aTmp;
194             }
195 
196             //! For each dictionary and each locale we need a seperate entry.
197             //! If this results in more than one dictionary per locale than (for now)
198 			//! it is undefined which dictionary gets used.
199 			//! In the future the implementation should support using several dictionaries
200 			//! for one locale.
201 			numdict = 0;
202             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
203 				numdict = numdict + aDictIt->aLocaleNames.getLength();
204 
205             // add dictionary information
206             aDicts = new HDInfo[numdict];
207 
208             k = 0;
209             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
210             {
211                 if (aDictIt->aLocaleNames.getLength() > 0 &&
212                     aDictIt->aLocations.getLength() > 0)
213                 {
214                     uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
215                     sal_Int32 nLocales = aLocaleNames.getLength();
216 
217                     // currently only one language per dictionary is supported in the actual implementation...
218                     // Thus here we work-around this by adding the same dictionary several times.
219                     // Once for each of it's supported locales.
220                     for (sal_Int32 i = 0;  i < nLocales;  ++i)
221                     {
222                         aDicts[k].aPtr = NULL;
223                         aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
224                         aDicts[k].aLoc = MsLangId::convertLanguageToLocale(
225                                         MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] ));
226                         aDicts[k].apCC = new CharClass( aDicts[k].aLoc );
227                         // also both files have to be in the same directory and the
228                         // file names must only differ in the extension (.aff/.dic).
229                         // Thus we use the first location only and strip the extension part.
230                         rtl::OUString aLocation = aDictIt->aLocations[0];
231                         sal_Int32 nPos = aLocation.lastIndexOf( '.' );
232                         aLocation = aLocation.copy( 0, nPos );
233                         aDicts[k].aName = aLocation;
234 
235                         ++k;
236                     }
237                 }
238             }
239             DBG_ASSERT( k == numdict, "index mismatch?" );
240         }
241         else
242         {
243             /* no dictionary found so register no dictionaries */
244             numdict = 0;
245             aDicts = NULL;
246             aSuppLocales.realloc(0);
247         }
248     }
249 
250     return aSuppLocales;
251 }
252 
253 
254 
255 sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
256 		throw(RuntimeException)
257 {
258 	MutexGuard	aGuard( GetLinguMutex() );
259 
260 	sal_Bool bRes = sal_False;
261 	if (!aSuppLocales.getLength())
262 		getLocales();
263 
264 	const Locale *pLocale = aSuppLocales.getConstArray();
265 	sal_Int32 nLen = aSuppLocales.getLength();
266 	for (sal_Int32 i = 0;  i < nLen;  ++i)
267 	{
268 		if (rLocale == pLocale[i])
269 		{
270 			bRes = sal_True;
271 			break;
272 		}
273 	}
274 	return bRes;
275 }
276 
277 
278 Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord,
279        const ::com::sun::star::lang::Locale& aLocale,
280        sal_Int16 nMaxLeading,
281        const ::com::sun::star::beans::PropertyValues& aProperties )
282        throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException)
283 {
284 	int nHyphenationPos = -1;
285     int nHyphenationPosAlt = -1;
286     int nHyphenationPosAltHyph = -1;
287 	int wordlen;
288 	char *hyphens;
289     char *lcword;
290     int k = 0;
291 
292     PropertyHelper_Hyphen & rHelper = GetPropHelper();
293     rHelper.SetTmpPropVals(aProperties);
294 	sal_Int16 minTrail = rHelper.GetMinTrailing();
295 	sal_Int16 minLead = rHelper.GetMinLeading();
296 	sal_Int16 minLen = rHelper.GetMinWordLength();
297 
298 	HyphenDict *dict = NULL;
299     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
300     CharClass * pCC = NULL;
301 
302 	Reference< XHyphenatedWord > xRes;
303 
304     k = -1;
305     for (int j = 0; j < numdict; j++)
306     {
307         if (aLocale == aDicts[j].aLoc)
308             k = j;
309     }
310 
311     // if we have a hyphenation dictionary matching this locale
312     if (k != -1)
313     {
314         // if this dictinary has not been loaded yet do that
315         if (!aDicts[k].aPtr)
316         {
317             OUString DictFN = aDicts[k].aName + A2OU(".dic");
318             OUString dictpath;
319 
320             osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
321             OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
322 
323 #if defined(WNT)
324             // workaround for Windows specifc problem that the
325             // path length in calls to 'fopen' is limted to somewhat
326             // about 120+ characters which will usually be exceed when
327             // using dictionaries as extensions.
328             sTmp = Win_GetShortPathName( dictpath );
329 #endif
330 
331             if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
332             {
333                fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
334                return NULL;
335             }
336             aDicts[k].aPtr = dict;
337             aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
338         }
339 
340         // other wise hyphenate the word with that dictionary
341         dict = aDicts[k].aPtr;
342         eEnc = aDicts[k].eEnc;
343         pCC =  aDicts[k].apCC;
344 
345         // we don't want to work with a default text encoding since following incorrect
346         // results may occur only for specific text and thus may be hard to notice.
347         // Thus better always make a clean exit here if the text encoding is in question.
348         // Hopefully something not working at all will raise proper attention quickly. ;-)
349         DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
350         if (eEnc == RTL_TEXTENCODING_DONTKNOW)
351             return NULL;
352 
353         sal_uInt16 ct = CAPTYPE_UNKNOWN;
354         ct = capitalType(aWord, pCC);
355 
356         // first convert any smart quotes or apostrophes to normal ones
357 	    OUStringBuffer rBuf(aWord);
358         sal_Int32 nc = rBuf.getLength();
359         sal_Unicode ch;
360         for (sal_Int32 ix=0; ix < nc; ix++)
361         {
362 	        ch = rBuf.charAt(ix);
363             if ((ch == 0x201C) || (ch == 0x201D))
364                 rBuf.setCharAt(ix,(sal_Unicode)0x0022);
365             if ((ch == 0x2018) || (ch == 0x2019))
366                 rBuf.setCharAt(ix,(sal_Unicode)0x0027);
367         }
368         OUString nWord(rBuf.makeStringAndClear());
369 
370         // now convert word to all lowercase for pattern recognition
371         OUString nTerm(makeLowerCase(nWord, pCC));
372 
373         // now convert word to needed encoding
374         OString encWord(OU2ENC(nTerm,eEnc));
375 
376 	    wordlen = encWord.getLength();
377         lcword = new char[wordlen + 1];
378 	    hyphens = new char[wordlen + 5];
379 
380         char ** rep = NULL; // replacements of discretionary hyphenation
381         int * pos = NULL; // array of [hyphenation point] minus [deletion position]
382         int * cut = NULL; // length of deletions in original word
383 
384         // copy converted word into simple char buffer
385         strcpy(lcword,encWord.getStr());
386 
387         // now strip off any ending periods
388         int n = wordlen-1;
389         while((n >=0) && (lcword[n] == '.'))
390             n--;
391         n++;
392         if (n > 0)
393         {
394             const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
395                     &rep, &pos, &cut, minLead, minTrail,
396                     Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead  - Max(dict->lhmin, 2))),
397                     Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
398             if (bFailed)
399             {
400                 //whoops something did not work
401                 delete[] hyphens;
402                 delete[] lcword;
403                 if (rep)
404                 {
405                     for(int j = 0; j < n; j++)
406                     {
407                         if (rep[j]) free(rep[j]);
408                     }
409                     free(rep);
410                 }
411                 if (pos) free(pos);
412                 if (cut) free(cut);
413                 return NULL;
414             }
415         }
416 
417         // now backfill hyphens[] for any removed trailing periods
418         for (int c = n; c < wordlen; c++) hyphens[c] = '0';
419         hyphens[wordlen] = '\0';
420 
421 	    sal_Int32 Leading =  GetPosInWordToCheck( aWord, nMaxLeading );
422 
423 	    for (sal_Int32 i = 0; i < n; i++)
424 	    {
425             int leftrep = 0;
426             sal_Bool hit = (n >= minLen);
427             if (!rep || !rep[i] || (i >= n))
428             {
429                 hit = hit && (hyphens[i]&1) && (i < Leading);
430                 hit = hit && (i >= (minLead-1) );
431                 hit = hit && ((n - i - 1) >= minTrail);
432             }
433             else
434             {
435                 // calculate change character length before hyphenation point signed with '='
436                 for (char * c = rep[i]; *c && (*c != '='); c++)
437                 {
438                     if (eEnc == RTL_TEXTENCODING_UTF8)
439                     {
440                         if (((unsigned char) *c) >> 6 != 2)
441                             leftrep++;
442                     }
443                     else
444                         leftrep++;
445                 }
446                 hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
447                 hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
448                 hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
449             }
450             if (hit)
451             {
452                 nHyphenationPos = i;
453                 if (rep && (i < n) && rep[i])
454                 {
455                     nHyphenationPosAlt = i - pos[i];
456                     nHyphenationPosAltHyph = i + leftrep - pos[i];
457                 }
458             }
459         }
460 
461         if (nHyphenationPos  == -1)
462         {
463             xRes = NULL;
464         }
465         else
466         {
467             if (rep && rep[nHyphenationPos])
468             {
469                 // remove equal sign
470                 char * s = rep[nHyphenationPos];
471                 int eq = 0;
472                 for (; *s; s++)
473                 {
474                     if (*s == '=') eq = 1;
475                     if (eq) *s = *(s + 1);
476                 }
477                 OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
478                 OUString repHyph;
479                 switch (ct)
480                 {
481                     case CAPTYPE_ALLCAP:
482                     {
483                         repHyph = makeUpperCase(repHyphlow, pCC);
484                         break;
485                     }
486                     case CAPTYPE_INITCAP:
487                     {
488                         if (nHyphenationPosAlt == 0)
489                             repHyph = makeInitCap(repHyphlow, pCC);
490                         else
491                              repHyph = repHyphlow;
492                         break;
493                     }
494                     default:
495                     {
496                         repHyph = repHyphlow;
497                         break;
498                     }
499                 }
500 
501                 // handle shortening
502                 sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
503                 nHyphenationPosAltHyph : nHyphenationPos);
504                 // dicretionary hyphenation
505                 xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), nPos,
506                     aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
507                     (sal_Int16) nHyphenationPosAltHyph);
508             }
509             else
510             {
511                 xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ),
512                     (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
513             }
514         }
515 
516         delete[] lcword;
517 	    delete[] hyphens;
518         if (rep)
519         {
520             for(int j = 0; j < n; j++)
521             {
522                 if (rep[j]) free(rep[j]);
523             }
524             free(rep);
525         }
526         if (pos) free(pos);
527         if (cut) free(cut);
528 	    return xRes;
529 	}
530     return NULL;
531 }
532 
533 
534 Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
535         const ::rtl::OUString& /*aWord*/,
536         const ::com::sun::star::lang::Locale& /*aLocale*/,
537         sal_Int16 /*nIndex*/,
538         const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
539         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
540 {
541   /* alternative spelling isn't supported by tex dictionaries */
542   /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
543   /* TASK: implement queryAlternativeSpelling() */
544   return NULL;
545 }
546 
547 Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord,
548         const ::com::sun::star::lang::Locale& aLocale,
549         const ::com::sun::star::beans::PropertyValues& aProperties )
550         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
551 {
552     int wordlen;
553     char *hyphens;
554     char *lcword;
555     int k;
556 
557     PropertyHelper_Hyphen & rHelper = GetPropHelper();
558     rHelper.SetTmpPropVals(aProperties);
559     sal_Int16 minTrail = rHelper.GetMinTrailing();
560     sal_Int16 minLead = rHelper.GetMinLeading();
561 
562     HyphenDict *dict = NULL;
563     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
564     CharClass* pCC = NULL;
565 
566     Reference< XPossibleHyphens > xRes;
567 
568     k = -1;
569     for (int j = 0; j < numdict; j++)
570     {
571         if (aLocale == aDicts[j].aLoc) k = j;
572     }
573 
574     // if we have a hyphenation dictionary matching this locale
575     if (k != -1)
576     {
577         // if this dictioanry has not been loaded yet do that
578         if (!aDicts[k].aPtr)
579         {
580             OUString DictFN = aDicts[k].aName + A2OU(".dic");
581             OUString dictpath;
582 
583             osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
584             OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
585 
586 #if defined(WNT)
587             // workaround for Windows specifc problem that the
588             // path length in calls to 'fopen' is limted to somewhat
589             // about 120+ characters which will usually be exceed when
590             // using dictionaries as extensions.
591             sTmp = Win_GetShortPathName( dictpath );
592 #endif
593 
594             if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
595             {
596                fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
597                return NULL;
598             }
599             aDicts[k].aPtr = dict;
600             aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
601         }
602 
603         // other wise hyphenate the word with that dictionary
604         dict = aDicts[k].aPtr;
605         eEnc = aDicts[k].eEnc;
606         pCC  = aDicts[k].apCC;
607 
608         // we don't want to work with a default text encoding since following incorrect
609         // results may occur only for specific text and thus may be hard to notice.
610         // Thus better always make a clean exit here if the text encoding is in question.
611         // Hopefully something not working at all will raise proper attention quickly. ;-)
612         DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
613         if (eEnc == RTL_TEXTENCODING_DONTKNOW)
614             return NULL;
615 
616         // first handle smart quotes both single and double
617         OUStringBuffer rBuf(aWord);
618         sal_Int32 nc = rBuf.getLength();
619         sal_Unicode ch;
620         for (sal_Int32 ix=0; ix < nc; ix++)
621         {
622             ch = rBuf.charAt(ix);
623             if ((ch == 0x201C) || (ch == 0x201D))
624                 rBuf.setCharAt(ix,(sal_Unicode)0x0022);
625             if ((ch == 0x2018) || (ch == 0x2019))
626                 rBuf.setCharAt(ix,(sal_Unicode)0x0027);
627         }
628         OUString nWord(rBuf.makeStringAndClear());
629 
630         // now convert word to all lowercase for pattern recognition
631         OUString nTerm(makeLowerCase(nWord, pCC));
632 
633         // now convert word to needed encoding
634         OString encWord(OU2ENC(nTerm,eEnc));
635 
636         wordlen = encWord.getLength();
637         lcword = new char[wordlen+1];
638         hyphens = new char[wordlen+5];
639         char ** rep = NULL; // replacements of discretionary hyphenation
640         int * pos = NULL; // array of [hyphenation point] minus [deletion position]
641         int * cut = NULL; // length of deletions in original word
642 
643         // copy converted word into simple char buffer
644         strcpy(lcword,encWord.getStr());
645 
646         // first remove any trailing periods
647         int n = wordlen-1;
648         while((n >=0) && (lcword[n] == '.'))
649             n--;
650         n++;
651         // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr);
652         if (n > 0)
653         {
654             const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
655                     &rep, &pos, &cut, minLead, minTrail,
656                     Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
657                     Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
658             if (bFailed)
659             {
660                 delete[] hyphens;
661                 delete[] lcword;
662 
663                 if (rep)
664                 {
665                     for(int j = 0; j < n; j++)
666                     {
667                         if (rep[j]) free(rep[j]);
668                     }
669                     free(rep);
670                 }
671                 if (pos) free(pos);
672                 if (cut) free(cut);
673 
674                 return NULL;
675             }
676         }
677         // now backfill hyphens[] for any removed periods
678         for (int c = n; c < wordlen; c++)
679             hyphens[c] = '0';
680         hyphens[wordlen] = '\0';
681         // fprintf(stderr,"... %s\n",hyphens); fflush(stderr);
682 
683         sal_Int16 nHyphCount = 0;
684         sal_Int16 i;
685 
686         for ( i = 0; i < encWord.getLength(); i++)
687         {
688             if (hyphens[i]&1 && (!rep || !rep[i]))
689                 nHyphCount++;
690         }
691 
692         Sequence< sal_Int16 > aHyphPos(nHyphCount);
693         sal_Int16 *pPos = aHyphPos.getArray();
694         OUStringBuffer hyphenatedWordBuffer;
695         OUString hyphenatedWord;
696         nHyphCount = 0;
697 
698         for (i = 0; i < nWord.getLength(); i++)
699         {
700             hyphenatedWordBuffer.append(aWord[i]);
701             // hyphenation position (not alternative)
702             if (hyphens[i]&1 && (!rep || !rep[i]))
703             {
704                 pPos[nHyphCount] = i;
705                 hyphenatedWordBuffer.append(sal_Unicode('='));
706                 nHyphCount++;
707             }
708         }
709 
710         hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
711         //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord));
712         //fflush(stderr);
713 
714         xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ),
715                   hyphenatedWord, aHyphPos );
716 
717         delete[] hyphens;
718         delete[] lcword;
719 
720         if (rep)
721         {
722             for(int j = 0; j < n; j++)
723             {
724                 if (rep[j]) free(rep[j]);
725             }
726             free(rep);
727         }
728         if (pos) free(pos);
729         if (cut) free(cut);
730 
731         return xRes;
732     }
733 
734     return NULL;
735 }
736 
737 sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC)
738 {
739     sal_Int32 tlen = aTerm.getLength();
740     if ((pCC) && (tlen))
741     {
742         String aStr(aTerm);
743         sal_Int32 nc = 0;
744         for (xub_StrLen tindex = 0; tindex < tlen;  tindex++)
745         {
746             if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER)
747                 nc++;
748         }
749 
750         if (nc == 0)
751             return (sal_uInt16) CAPTYPE_NOCAP;
752         if (nc == tlen)
753             return (sal_uInt16) CAPTYPE_ALLCAP;
754         if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER))
755             return (sal_uInt16) CAPTYPE_INITCAP;
756 
757         return (sal_uInt16) CAPTYPE_MIXED;
758     }
759     return (sal_uInt16) CAPTYPE_UNKNOWN;
760 }
761 
762 OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
763 {
764     if (pCC)
765         return pCC->toLower_rtl(aTerm, 0, aTerm.getLength());
766     return aTerm;
767 }
768 
769 OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
770 {
771     if (pCC)
772         return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength());
773     return aTerm;
774 }
775 
776 
777 OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
778 {
779     sal_Int32 tlen = aTerm.getLength();
780     if ((pCC) && (tlen))
781     {
782         OUString bTemp = aTerm.copy(0,1);
783         if (tlen > 1)
784             return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) );
785 
786         return pCC->toUpper_rtl(bTemp, 0, 1);
787 	}
788     return aTerm;
789 }
790 
791 
792 Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
793         const Reference< XMultiServiceFactory > & /*rSMgr*/ )
794         throw(Exception)
795 {
796 	Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
797 	return xService;
798 }
799 
800 
801 sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
802         const Reference< XLinguServiceEventListener >& rxLstnr )
803         throw(RuntimeException)
804 {
805 	MutexGuard	aGuard( GetLinguMutex() );
806 
807 	sal_Bool bRes = sal_False;
808 	if (!bDisposing && rxLstnr.is())
809 	{
810 		bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
811 	}
812 	return bRes;
813 }
814 
815 
816 sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
817         const Reference< XLinguServiceEventListener >& rxLstnr )
818         throw(RuntimeException)
819 {
820 	MutexGuard	aGuard( GetLinguMutex() );
821 
822 	sal_Bool bRes = sal_False;
823 	if (!bDisposing && rxLstnr.is())
824 	{
825 		DBG_ASSERT( xPropHelper.is(), "xPropHelper non existent" );
826 		bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
827 	}
828 	return bRes;
829 }
830 
831 
832 OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
833         throw(RuntimeException)
834 {
835 	MutexGuard	aGuard( GetLinguMutex() );
836 	return A2OU( "Libhyphen Hyphenator" );
837 }
838 
839 
840 void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
841 		throw(Exception, RuntimeException)
842 {
843 	MutexGuard	aGuard( GetLinguMutex() );
844 
845 	if (!pPropHelper)
846 	{
847 		sal_Int32 nLen = rArguments.getLength();
848 		if (2 == nLen)
849 		{
850 			Reference< XPropertySet	>	xPropSet;
851 			rArguments.getConstArray()[0] >>= xPropSet;
852 			//rArguments.getConstArray()[1] >>= xDicList;
853 
854 			//! Pointer allows for access of the non-UNO functions.
855 			//! And the reference to the UNO-functions while increasing
856 			//! the ref-count and will implicitly free the memory
857 			//! when the object is not longer used.
858 			pPropHelper = new PropertyHelper_Hyphen( (XHyphenator *) this, xPropSet );
859 			xPropHelper = pPropHelper;
860 			pPropHelper->AddAsPropListener();	//! after a reference is established
861 		}
862         else
863         {
864 			DBG_ERROR( "wrong number of arguments in sequence" );
865         }
866 	}
867 }
868 
869 
870 void SAL_CALL Hyphenator::dispose()
871 		throw(RuntimeException)
872 {
873 	MutexGuard	aGuard( GetLinguMutex() );
874 
875 	if (!bDisposing)
876 	{
877 		bDisposing = sal_True;
878 		EventObject	aEvtObj( (XHyphenator *) this );
879 		aEvtListeners.disposeAndClear( aEvtObj );
880 	}
881 }
882 
883 
884 void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
885 		throw(RuntimeException)
886 {
887 	MutexGuard	aGuard( GetLinguMutex() );
888 
889 	if (!bDisposing && rxListener.is())
890 		aEvtListeners.addInterface( rxListener );
891 }
892 
893 
894 void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
895 		throw(RuntimeException)
896 {
897 	MutexGuard	aGuard( GetLinguMutex() );
898 
899 	if (!bDisposing && rxListener.is())
900 		aEvtListeners.removeInterface( rxListener );
901 }
902 
903 
904 ///////////////////////////////////////////////////////////////////////////
905 // Service specific part
906 //
907 
908 OUString SAL_CALL Hyphenator::getImplementationName()
909 		throw(RuntimeException)
910 {
911 	MutexGuard	aGuard( GetLinguMutex() );
912 
913 	return getImplementationName_Static();
914 }
915 
916 
917 sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
918 		throw(RuntimeException)
919 {
920 	MutexGuard	aGuard( GetLinguMutex() );
921 
922 	Sequence< OUString > aSNL = getSupportedServiceNames();
923 	const OUString * pArray = aSNL.getConstArray();
924 	for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
925 		if( pArray[i] == ServiceName )
926 			return sal_True;
927 	return sal_False;
928 }
929 
930 
931 Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
932 		throw(RuntimeException)
933 {
934 	MutexGuard	aGuard( GetLinguMutex() );
935 
936 	return getSupportedServiceNames_Static();
937 }
938 
939 
940 Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
941 		throw()
942 {
943 	MutexGuard	aGuard( GetLinguMutex() );
944 
945 	Sequence< OUString > aSNS( 1 );	// auch mehr als 1 Service moeglich
946 	aSNS.getArray()[0] = A2OU( SN_HYPHENATOR );
947 	return aSNS;
948 }
949 
950 void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
951 			XMultiServiceFactory * pServiceManager, void *  )
952 {
953 	void * pRet = 0;
954 	if ( !Hyphenator::getImplementationName_Static().compareToAscii( pImplName ) )
955 	{
956 		Reference< XSingleServiceFactory > xFactory =
957 			cppu::createOneInstanceFactory(
958 				pServiceManager,
959 				Hyphenator::getImplementationName_Static(),
960 				Hyphenator_CreateInstance,
961 				Hyphenator::getSupportedServiceNames_Static());
962 		// acquire, because we return an interface pointer instead of a reference
963 		xFactory->acquire();
964 		pRet = xFactory.get();
965 	}
966 	return pRet;
967 }
968 
969 
970 ///////////////////////////////////////////////////////////////////////////
971 
972 #undef CAPTYPE_UNKNOWN
973 #undef CAPTYPE_NOCAP
974 #undef CAPTYPE_INITCAP
975 #undef CAPTYPE_ALLCAP
976 #undef CAPTYPE_MIXED
977