1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_hyphen.hxx"
26
27
28 #include <com/sun/star/uno/Reference.h>
29 #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
30
31 #include <cppuhelper/factory.hxx> // helper for factories
32 #include <com/sun/star/registry/XRegistryKey.hpp>
33 #include <i18npool/mslangid.hxx>
34 #include <unotools/pathoptions.hxx>
35 #include <unotools/useroptions.hxx>
36 #include <tools/debug.hxx>
37 #include <unotools/processfactory.hxx>
38 #include <osl/mutex.hxx>
39
40 #include <hyphen.h>
41 #include <hyphenimp.hxx>
42
43 #include <linguistic/hyphdta.hxx>
44 #include <rtl/ustring.hxx>
45 #include <rtl/ustrbuf.hxx>
46 #include <rtl/textenc.h>
47
48 #include <linguistic/lngprops.hxx>
49 #include <unotools/pathoptions.hxx>
50 #include <unotools/useroptions.hxx>
51 #include <unotools/lingucfg.hxx>
52 #include <osl/file.hxx>
53
54 #include <stdio.h>
55 #include <string.h>
56
57 #include <list>
58 #include <set>
59
60 using namespace utl;
61 using namespace osl;
62 using namespace rtl;
63 using namespace com::sun::star;
64 using namespace com::sun::star::beans;
65 using namespace com::sun::star::lang;
66 using namespace com::sun::star::uno;
67 using namespace com::sun::star::linguistic2;
68 using namespace linguistic;
69
70 // values assigned to capitalization types
71 #define CAPTYPE_UNKNOWN 0
72 #define CAPTYPE_NOCAP 1
73 #define CAPTYPE_INITCAP 2
74 #define CAPTYPE_ALLCAP 3
75 #define CAPTYPE_MIXED 4
76
77 // min, max
78
79 //#define Min(a,b) (a < b ? a : b)
80 #define Max(a,b) (a > b ? a : b)
81
82 ///////////////////////////////////////////////////////////////////////////
83
84
Hyphenator()85 Hyphenator::Hyphenator() :
86 aEvtListeners ( GetLinguMutex() )
87 {
88 bDisposing = sal_False;
89 pPropHelper = NULL;
90 aDicts = NULL;
91 numdict = 0;
92 }
93
94
~Hyphenator()95 Hyphenator::~Hyphenator()
96 {
97 if (pPropHelper)
98 pPropHelper->RemoveAsPropListener();
99
100 if ((numdict) && (aDicts))
101 {
102 for (int i=0; i < numdict; i++)
103 {
104 if (aDicts[i].apCC) delete aDicts[i].apCC;
105 aDicts[i].apCC = NULL;
106 }
107 }
108 if (aDicts) delete[] aDicts;
109 aDicts = NULL;
110 numdict = 0;
111 delete pPropHelper;
112 }
113
114
GetPropHelper_Impl()115 PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
116 {
117 if (!pPropHelper)
118 {
119 Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY );
120
121 pPropHelper = new PropertyHelper_Hyphenation ((XHyphenator *) this, xPropSet );
122 pPropHelper->AddAsPropListener(); //! after a reference is established
123 }
124 return *pPropHelper;
125
126 }
127
128
getLocales()129 Sequence< Locale > SAL_CALL Hyphenator::getLocales()
130 throw(RuntimeException)
131 {
132 MutexGuard aGuard( GetLinguMutex() );
133
134 // this routine should return the locales supported by the installed
135 // dictionaries.
136
137 if (!numdict)
138 {
139 SvtLinguConfig aLinguCfg;
140
141 // get list of dictionaries-to-use
142 // (or better speaking: the list of dictionaries using the
143 // new configuration entries).
144 std::list< SvtLinguConfigDictionaryEntry > aDics;
145 uno::Sequence< rtl::OUString > aFormatList;
146 aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Hyphenators"),
147 A2OU("org.openoffice.lingu.LibHnjHyphenator"), aFormatList );
148 sal_Int32 nLen = aFormatList.getLength();
149 for (sal_Int32 i = 0; i < nLen; ++i)
150 {
151 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
152 aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
153 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
154 }
155
156 //!! for compatibility with old dictionaries (the ones not using extensions
157 //!! or new configuration entries, but still using the dictionary.lst file)
158 //!! Get the list of old style spell checking dictionaries to use...
159 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
160 GetOldStyleDics( "HYPH" ) );
161
162 // to prefer dictionaries with configuration entries we will only
163 // use those old style dictionaries that add a language that
164 // is not yet supported by the list od new style dictionaries
165 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
166
167 numdict = aDics.size();
168 if (numdict)
169 {
170 // get supported locales from the dictionaries-to-use...
171 sal_Int32 k = 0;
172 std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet;
173 std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
174 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
175 {
176 uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
177 sal_Int32 nLen2 = aLocaleNames.getLength();
178 for (k = 0; k < nLen2; ++k)
179 {
180 aLocaleNamesSet.insert( aLocaleNames[k] );
181 }
182 }
183 // ... and add them to the resulting sequence
184 aSuppLocales.realloc( aLocaleNamesSet.size() );
185 std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB;
186 k = 0;
187 for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
188 {
189 Locale aTmp( MsLangId::convertLanguageToLocale(
190 MsLangId::convertIsoStringToLanguage( *aItB )));
191 aSuppLocales[k++] = aTmp;
192 }
193
194 //! For each dictionary and each locale we need a separate entry.
195 //! If this results in more than one dictionary per locale than (for now)
196 //! it is undefined which dictionary gets used.
197 //! In the future the implementation should support using several dictionaries
198 //! for one locale.
199 numdict = 0;
200 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
201 numdict = numdict + aDictIt->aLocaleNames.getLength();
202
203 // add dictionary information
204 aDicts = new HDInfo[numdict];
205
206 k = 0;
207 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
208 {
209 if (aDictIt->aLocaleNames.getLength() > 0 &&
210 aDictIt->aLocations.getLength() > 0)
211 {
212 uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
213 sal_Int32 nLocales = aLocaleNames.getLength();
214
215 // currently only one language per dictionary is supported in the actual implementation...
216 // Thus here we work-around this by adding the same dictionary several times.
217 // Once for each of it's supported locales.
218 for (sal_Int32 i = 0; i < nLocales; ++i)
219 {
220 aDicts[k].aPtr = NULL;
221 aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
222 aDicts[k].aLoc = MsLangId::convertLanguageToLocale(
223 MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] ));
224 aDicts[k].apCC = new CharClass( aDicts[k].aLoc );
225 // also both files have to be in the same directory and the
226 // file names must only differ in the extension (.aff/.dic).
227 // Thus we use the first location only and strip the extension part.
228 rtl::OUString aLocation = aDictIt->aLocations[0];
229 sal_Int32 nPos = aLocation.lastIndexOf( '.' );
230 aLocation = aLocation.copy( 0, nPos );
231 aDicts[k].aName = aLocation;
232
233 ++k;
234 }
235 }
236 }
237 DBG_ASSERT( k == numdict, "index mismatch?" );
238 }
239 else
240 {
241 /* no dictionary found so register no dictionaries */
242 numdict = 0;
243 aDicts = NULL;
244 aSuppLocales.realloc(0);
245 }
246 }
247
248 return aSuppLocales;
249 }
250
251
252
hasLocale(const Locale & rLocale)253 sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
254 throw(RuntimeException)
255 {
256 MutexGuard aGuard( GetLinguMutex() );
257
258 sal_Bool bRes = sal_False;
259 if (!aSuppLocales.getLength())
260 getLocales();
261
262 const Locale *pLocale = aSuppLocales.getConstArray();
263 sal_Int32 nLen = aSuppLocales.getLength();
264 for (sal_Int32 i = 0; i < nLen; ++i)
265 {
266 if (rLocale == pLocale[i])
267 {
268 bRes = sal_True;
269 break;
270 }
271 }
272 return bRes;
273 }
274
275
hyphenate(const::rtl::OUString & aWord,const::com::sun::star::lang::Locale & aLocale,sal_Int16 nMaxLeading,const::com::sun::star::beans::PropertyValues & aProperties)276 Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord,
277 const ::com::sun::star::lang::Locale& aLocale,
278 sal_Int16 nMaxLeading,
279 const ::com::sun::star::beans::PropertyValues& aProperties )
280 throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException)
281 {
282 int nHyphenationPos = -1;
283 int nHyphenationPosAlt = -1;
284 int nHyphenationPosAltHyph = -1;
285 int wordlen;
286 char *hyphens;
287 char *lcword;
288 int k = 0;
289
290 PropertyHelper_Hyphenation& rHelper = GetPropHelper();
291 rHelper.SetTmpPropVals(aProperties);
292 sal_Int16 minTrail = rHelper.GetMinTrailing();
293 sal_Int16 minLead = rHelper.GetMinLeading();
294 sal_Int16 minLen = rHelper.GetMinWordLength();
295
296 HyphenDict *dict = NULL;
297 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
298 CharClass * pCC = NULL;
299
300 Reference< XHyphenatedWord > xRes;
301
302 k = -1;
303 for (int j = 0; j < numdict; j++)
304 {
305 if (aLocale == aDicts[j].aLoc)
306 k = j;
307 }
308
309 // if we have a hyphenation dictionary matching this locale
310 if (k != -1)
311 {
312 // if this dictinary has not been loaded yet do that
313 if (!aDicts[k].aPtr)
314 {
315 OUString DictFN = aDicts[k].aName + A2OU(".dic");
316 OUString dictpath;
317
318 osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
319 OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
320
321 #if defined(WNT)
322 // workaround for Windows specifc problem that the
323 // path length in calls to 'fopen' is limted to somewhat
324 // about 120+ characters which will usually be exceed when
325 // using dictionaries as extensions.
326 sTmp = Win_GetShortPathName( dictpath );
327 #endif
328
329 if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
330 {
331 fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
332 return NULL;
333 }
334 aDicts[k].aPtr = dict;
335 aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
336 }
337
338 // other wise hyphenate the word with that dictionary
339 dict = aDicts[k].aPtr;
340 eEnc = aDicts[k].eEnc;
341 pCC = aDicts[k].apCC;
342
343 // we don't want to work with a default text encoding since following incorrect
344 // results may occur only for specific text and thus may be hard to notice.
345 // Thus better always make a clean exit here if the text encoding is in question.
346 // Hopefully something not working at all will raise proper attention quickly. ;-)
347 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
348 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
349 return NULL;
350
351 sal_uInt16 ct = CAPTYPE_UNKNOWN;
352 ct = capitalType(aWord, pCC);
353
354 // first convert any smart quotes or apostrophes to normal ones
355 OUStringBuffer rBuf(aWord);
356 sal_Int32 nc = rBuf.getLength();
357 sal_Unicode ch;
358 for (sal_Int32 ix=0; ix < nc; ix++)
359 {
360 ch = rBuf.charAt(ix);
361 if ((ch == 0x201C) || (ch == 0x201D))
362 rBuf.setCharAt(ix,(sal_Unicode)0x0022);
363 if ((ch == 0x2018) || (ch == 0x2019))
364 rBuf.setCharAt(ix,(sal_Unicode)0x0027);
365 }
366 OUString nWord(rBuf.makeStringAndClear());
367
368 // now convert word to all lowercase for pattern recognition
369 OUString nTerm(makeLowerCase(nWord, pCC));
370
371 // now convert word to needed encoding
372 OString encWord(OU2ENC(nTerm,eEnc));
373
374 wordlen = encWord.getLength();
375 lcword = new char[wordlen + 1];
376 hyphens = new char[wordlen + 5];
377
378 char ** rep = NULL; // replacements of discretionary hyphenation
379 int * pos = NULL; // array of [hyphenation point] minus [deletion position]
380 int * cut = NULL; // length of deletions in original word
381
382 // copy converted word into simple char buffer
383 strcpy(lcword,encWord.getStr());
384
385 // now strip off any ending periods
386 int n = wordlen-1;
387 while((n >=0) && (lcword[n] == '.'))
388 n--;
389 n++;
390 if (n > 0)
391 {
392 const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
393 &rep, &pos, &cut, minLead, minTrail,
394 Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
395 Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
396 if (bFailed)
397 {
398 //whoops something did not work
399 delete[] hyphens;
400 delete[] lcword;
401 if (rep)
402 {
403 for(int j = 0; j < n; j++)
404 {
405 if (rep[j]) free(rep[j]);
406 }
407 free(rep);
408 }
409 if (pos) free(pos);
410 if (cut) free(cut);
411 return NULL;
412 }
413 }
414
415 // now backfill hyphens[] for any removed trailing periods
416 for (int c = n; c < wordlen; c++) hyphens[c] = '0';
417 hyphens[wordlen] = '\0';
418
419 sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
420
421 for (sal_Int32 i = 0; i < n; i++)
422 {
423 int leftrep = 0;
424 sal_Bool hit = (n >= minLen);
425 if (!rep || !rep[i] || (i >= n))
426 {
427 hit = hit && (hyphens[i]&1) && (i < Leading);
428 hit = hit && (i >= (minLead-1) );
429 hit = hit && ((n - i - 1) >= minTrail);
430 }
431 else
432 {
433 // calculate change character length before hyphenation point signed with '='
434 for (char * c = rep[i]; *c && (*c != '='); c++)
435 {
436 if (eEnc == RTL_TEXTENCODING_UTF8)
437 {
438 if (((unsigned char) *c) >> 6 != 2)
439 leftrep++;
440 }
441 else
442 leftrep++;
443 }
444 hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
445 hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
446 hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
447 }
448 if (hit)
449 {
450 nHyphenationPos = i;
451 if (rep && (i < n) && rep[i])
452 {
453 nHyphenationPosAlt = i - pos[i];
454 nHyphenationPosAltHyph = i + leftrep - pos[i];
455 }
456 }
457 }
458
459 if (nHyphenationPos == -1)
460 {
461 xRes = NULL;
462 }
463 else
464 {
465 if (rep && rep[nHyphenationPos])
466 {
467 // remove equal sign
468 char * s = rep[nHyphenationPos];
469 int eq = 0;
470 for (; *s; s++)
471 {
472 if (*s == '=') eq = 1;
473 if (eq) *s = *(s + 1);
474 }
475 OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
476 OUString repHyph;
477 switch (ct)
478 {
479 case CAPTYPE_ALLCAP:
480 {
481 repHyph = makeUpperCase(repHyphlow, pCC);
482 break;
483 }
484 case CAPTYPE_INITCAP:
485 {
486 if (nHyphenationPosAlt == 0)
487 repHyph = makeInitCap(repHyphlow, pCC);
488 else
489 repHyph = repHyphlow;
490 break;
491 }
492 default:
493 {
494 repHyph = repHyphlow;
495 break;
496 }
497 }
498
499 // handle shortening
500 sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
501 nHyphenationPosAltHyph : nHyphenationPos);
502 // dicretionary hyphenation
503 xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LocaleToLanguage( aLocale ), nPos,
504 aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
505 (sal_Int16) nHyphenationPosAltHyph);
506 }
507 else
508 {
509 xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LocaleToLanguage( aLocale ),
510 (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
511 }
512 }
513
514 delete[] lcword;
515 delete[] hyphens;
516 if (rep)
517 {
518 for(int j = 0; j < n; j++)
519 {
520 if (rep[j]) free(rep[j]);
521 }
522 free(rep);
523 }
524 if (pos) free(pos);
525 if (cut) free(cut);
526 return xRes;
527 }
528 return NULL;
529 }
530
531
queryAlternativeSpelling(const::rtl::OUString &,const::com::sun::star::lang::Locale &,sal_Int16,const::com::sun::star::beans::PropertyValues &)532 Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
533 const ::rtl::OUString& /*aWord*/,
534 const ::com::sun::star::lang::Locale& /*aLocale*/,
535 sal_Int16 /*nIndex*/,
536 const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
537 throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
538 {
539 /* alternative spelling isn't supported by tex dictionaries */
540 /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
541 /* TASK: implement queryAlternativeSpelling() */
542 return NULL;
543 }
544
createPossibleHyphens(const::rtl::OUString & aWord,const::com::sun::star::lang::Locale & aLocale,const::com::sun::star::beans::PropertyValues & aProperties)545 Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord,
546 const ::com::sun::star::lang::Locale& aLocale,
547 const ::com::sun::star::beans::PropertyValues& aProperties )
548 throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
549 {
550 int wordlen;
551 char *hyphens;
552 char *lcword;
553 int k;
554
555 PropertyHelper_Hyphenation& rHelper = GetPropHelper();
556 rHelper.SetTmpPropVals(aProperties);
557 sal_Int16 minTrail = rHelper.GetMinTrailing();
558 sal_Int16 minLead = rHelper.GetMinLeading();
559
560 HyphenDict *dict = NULL;
561 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
562 CharClass* pCC = NULL;
563
564 Reference< XPossibleHyphens > xRes;
565
566 k = -1;
567 for (int j = 0; j < numdict; j++)
568 {
569 if (aLocale == aDicts[j].aLoc) k = j;
570 }
571
572 // if we have a hyphenation dictionary matching this locale
573 if (k != -1)
574 {
575 // if this dictioanry has not been loaded yet do that
576 if (!aDicts[k].aPtr)
577 {
578 OUString DictFN = aDicts[k].aName + A2OU(".dic");
579 OUString dictpath;
580
581 osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
582 OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
583
584 #if defined(WNT)
585 // workaround for Windows specifc problem that the
586 // path length in calls to 'fopen' is limted to somewhat
587 // about 120+ characters which will usually be exceed when
588 // using dictionaries as extensions.
589 sTmp = Win_GetShortPathName( dictpath );
590 #endif
591
592 if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
593 {
594 fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
595 return NULL;
596 }
597 aDicts[k].aPtr = dict;
598 aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
599 }
600
601 // other wise hyphenate the word with that dictionary
602 dict = aDicts[k].aPtr;
603 eEnc = aDicts[k].eEnc;
604 pCC = aDicts[k].apCC;
605
606 // we don't want to work with a default text encoding since following incorrect
607 // results may occur only for specific text and thus may be hard to notice.
608 // Thus better always make a clean exit here if the text encoding is in question.
609 // Hopefully something not working at all will raise proper attention quickly. ;-)
610 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
611 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
612 return NULL;
613
614 // first handle smart quotes both single and double
615 OUStringBuffer rBuf(aWord);
616 sal_Int32 nc = rBuf.getLength();
617 sal_Unicode ch;
618 for (sal_Int32 ix=0; ix < nc; ix++)
619 {
620 ch = rBuf.charAt(ix);
621 if ((ch == 0x201C) || (ch == 0x201D))
622 rBuf.setCharAt(ix,(sal_Unicode)0x0022);
623 if ((ch == 0x2018) || (ch == 0x2019))
624 rBuf.setCharAt(ix,(sal_Unicode)0x0027);
625 }
626 OUString nWord(rBuf.makeStringAndClear());
627
628 // now convert word to all lowercase for pattern recognition
629 OUString nTerm(makeLowerCase(nWord, pCC));
630
631 // now convert word to needed encoding
632 OString encWord(OU2ENC(nTerm,eEnc));
633
634 wordlen = encWord.getLength();
635 lcword = new char[wordlen+1];
636 hyphens = new char[wordlen+5];
637 char ** rep = NULL; // replacements of discretionary hyphenation
638 int * pos = NULL; // array of [hyphenation point] minus [deletion position]
639 int * cut = NULL; // length of deletions in original word
640
641 // copy converted word into simple char buffer
642 strcpy(lcword,encWord.getStr());
643
644 // first remove any trailing periods
645 int n = wordlen-1;
646 while((n >=0) && (lcword[n] == '.'))
647 n--;
648 n++;
649 // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr);
650 if (n > 0)
651 {
652 const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
653 &rep, &pos, &cut, minLead, minTrail,
654 Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
655 Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
656 if (bFailed)
657 {
658 delete[] hyphens;
659 delete[] lcword;
660
661 if (rep)
662 {
663 for(int j = 0; j < n; j++)
664 {
665 if (rep[j]) free(rep[j]);
666 }
667 free(rep);
668 }
669 if (pos) free(pos);
670 if (cut) free(cut);
671
672 return NULL;
673 }
674 }
675 // now backfill hyphens[] for any removed periods
676 for (int c = n; c < wordlen; c++)
677 hyphens[c] = '0';
678 hyphens[wordlen] = '\0';
679 // fprintf(stderr,"... %s\n",hyphens); fflush(stderr);
680
681 sal_Int16 nHyphCount = 0;
682 sal_Int16 i;
683
684 for ( i = 0; i < encWord.getLength(); i++)
685 {
686 if (hyphens[i]&1 && (!rep || !rep[i]))
687 nHyphCount++;
688 }
689
690 Sequence< sal_Int16 > aHyphPos(nHyphCount);
691 sal_Int16 *pPos = aHyphPos.getArray();
692 OUStringBuffer hyphenatedWordBuffer;
693 OUString hyphenatedWord;
694 nHyphCount = 0;
695
696 for (i = 0; i < nWord.getLength(); i++)
697 {
698 hyphenatedWordBuffer.append(aWord[i]);
699 // hyphenation position (not alternative)
700 if (hyphens[i]&1 && (!rep || !rep[i]))
701 {
702 pPos[nHyphCount] = i;
703 hyphenatedWordBuffer.append(sal_Unicode('='));
704 nHyphCount++;
705 }
706 }
707
708 hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
709 //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord));
710 //fflush(stderr);
711
712 xRes = PossibleHyphens::CreatePossibleHyphens( aWord, LocaleToLanguage( aLocale ),
713 hyphenatedWord, aHyphPos );
714
715 delete[] hyphens;
716 delete[] lcword;
717
718 if (rep)
719 {
720 for(int j = 0; j < n; j++)
721 {
722 if (rep[j]) free(rep[j]);
723 }
724 free(rep);
725 }
726 if (pos) free(pos);
727 if (cut) free(cut);
728
729 return xRes;
730 }
731
732 return NULL;
733 }
734
capitalType(const OUString & aTerm,CharClass * pCC)735 sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC)
736 {
737 sal_Int32 tlen = aTerm.getLength();
738 if ((pCC) && (tlen))
739 {
740 String aStr(aTerm);
741 sal_Int32 nc = 0;
742 for (xub_StrLen tindex = 0; tindex < tlen; tindex++)
743 {
744 if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER)
745 nc++;
746 }
747
748 if (nc == 0)
749 return (sal_uInt16) CAPTYPE_NOCAP;
750 if (nc == tlen)
751 return (sal_uInt16) CAPTYPE_ALLCAP;
752 if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER))
753 return (sal_uInt16) CAPTYPE_INITCAP;
754
755 return (sal_uInt16) CAPTYPE_MIXED;
756 }
757 return (sal_uInt16) CAPTYPE_UNKNOWN;
758 }
759
makeLowerCase(const OUString & aTerm,CharClass * pCC)760 OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
761 {
762 if (pCC)
763 return pCC->toLower_rtl(aTerm, 0, aTerm.getLength());
764 return aTerm;
765 }
766
makeUpperCase(const OUString & aTerm,CharClass * pCC)767 OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
768 {
769 if (pCC)
770 return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength());
771 return aTerm;
772 }
773
774
makeInitCap(const OUString & aTerm,CharClass * pCC)775 OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
776 {
777 sal_Int32 tlen = aTerm.getLength();
778 if ((pCC) && (tlen))
779 {
780 OUString bTemp = aTerm.copy(0,1);
781 if (tlen > 1)
782 return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) );
783
784 return pCC->toUpper_rtl(bTemp, 0, 1);
785 }
786 return aTerm;
787 }
788
789
Hyphenator_CreateInstance(const Reference<XMultiServiceFactory> &)790 Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
791 const Reference< XMultiServiceFactory > & /*rSMgr*/ )
792 throw(Exception)
793 {
794 Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
795 return xService;
796 }
797
798
addLinguServiceEventListener(const Reference<XLinguServiceEventListener> & rxLstnr)799 sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
800 const Reference< XLinguServiceEventListener >& rxLstnr )
801 throw(RuntimeException)
802 {
803 MutexGuard aGuard( GetLinguMutex() );
804
805 sal_Bool bRes = sal_False;
806 if (!bDisposing && rxLstnr.is())
807 {
808 bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
809 }
810 return bRes;
811 }
812
813
removeLinguServiceEventListener(const Reference<XLinguServiceEventListener> & rxLstnr)814 sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
815 const Reference< XLinguServiceEventListener >& rxLstnr )
816 throw(RuntimeException)
817 {
818 MutexGuard aGuard( GetLinguMutex() );
819
820 sal_Bool bRes = sal_False;
821 if (!bDisposing && rxLstnr.is())
822 {
823 bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
824 }
825 return bRes;
826 }
827
828
getServiceDisplayName(const Locale &)829 OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
830 throw(RuntimeException)
831 {
832 MutexGuard aGuard( GetLinguMutex() );
833 return A2OU( "Libhyphen Hyphenator" );
834 }
835
836
initialize(const Sequence<Any> & rArguments)837 void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
838 throw(Exception, RuntimeException)
839 {
840 MutexGuard aGuard( GetLinguMutex() );
841
842 if (!pPropHelper)
843 {
844 sal_Int32 nLen = rArguments.getLength();
845 if (2 == nLen)
846 {
847 Reference< XPropertySet > xPropSet;
848 rArguments.getConstArray()[0] >>= xPropSet;
849 //rArguments.getConstArray()[1] >>= xDicList;
850
851 //! Pointer allows for access of the non-UNO functions.
852 //! And the reference to the UNO-functions while increasing
853 //! the ref-count and will implicitly free the memory
854 //! when the object is not longer used.
855 pPropHelper = new PropertyHelper_Hyphenation( (XHyphenator *) this, xPropSet );
856 pPropHelper->AddAsPropListener(); //! after a reference is established
857 }
858 else
859 {
860 DBG_ERROR( "wrong number of arguments in sequence" );
861 }
862 }
863 }
864
865
dispose()866 void SAL_CALL Hyphenator::dispose()
867 throw(RuntimeException)
868 {
869 MutexGuard aGuard( GetLinguMutex() );
870
871 if (!bDisposing)
872 {
873 bDisposing = sal_True;
874 EventObject aEvtObj( (XHyphenator *) this );
875 aEvtListeners.disposeAndClear( aEvtObj );
876 }
877 }
878
879
addEventListener(const Reference<XEventListener> & rxListener)880 void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
881 throw(RuntimeException)
882 {
883 MutexGuard aGuard( GetLinguMutex() );
884
885 if (!bDisposing && rxListener.is())
886 aEvtListeners.addInterface( rxListener );
887 }
888
889
removeEventListener(const Reference<XEventListener> & rxListener)890 void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
891 throw(RuntimeException)
892 {
893 MutexGuard aGuard( GetLinguMutex() );
894
895 if (!bDisposing && rxListener.is())
896 aEvtListeners.removeInterface( rxListener );
897 }
898
899
900 ///////////////////////////////////////////////////////////////////////////
901 // Service specific part
902 //
903
getImplementationName()904 OUString SAL_CALL Hyphenator::getImplementationName()
905 throw(RuntimeException)
906 {
907 MutexGuard aGuard( GetLinguMutex() );
908
909 return getImplementationName_Static();
910 }
911
912
supportsService(const OUString & ServiceName)913 sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
914 throw(RuntimeException)
915 {
916 MutexGuard aGuard( GetLinguMutex() );
917
918 Sequence< OUString > aSNL = getSupportedServiceNames();
919 const OUString * pArray = aSNL.getConstArray();
920 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
921 if( pArray[i] == ServiceName )
922 return sal_True;
923 return sal_False;
924 }
925
926
getSupportedServiceNames()927 Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
928 throw(RuntimeException)
929 {
930 MutexGuard aGuard( GetLinguMutex() );
931
932 return getSupportedServiceNames_Static();
933 }
934
935
getSupportedServiceNames_Static()936 Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
937 throw()
938 {
939 MutexGuard aGuard( GetLinguMutex() );
940
941 Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich
942 aSNS.getArray()[0] = A2OU( SN_HYPHENATOR );
943 return aSNS;
944 }
945
Hyphenator_getFactory(const sal_Char * pImplName,XMultiServiceFactory * pServiceManager,void *)946 void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
947 XMultiServiceFactory * pServiceManager, void * )
948 {
949 void * pRet = 0;
950 if ( !Hyphenator::getImplementationName_Static().compareToAscii( pImplName ) )
951 {
952 Reference< XSingleServiceFactory > xFactory =
953 cppu::createOneInstanceFactory(
954 pServiceManager,
955 Hyphenator::getImplementationName_Static(),
956 Hyphenator_CreateInstance,
957 Hyphenator::getSupportedServiceNames_Static());
958 // acquire, because we return an interface pointer instead of a reference
959 xFactory->acquire();
960 pRet = xFactory.get();
961 }
962 return pRet;
963 }
964
965
966 ///////////////////////////////////////////////////////////////////////////
967
968 #undef CAPTYPE_UNKNOWN
969 #undef CAPTYPE_NOCAP
970 #undef CAPTYPE_INITCAP
971 #undef CAPTYPE_ALLCAP
972 #undef CAPTYPE_MIXED
973