1*4c5491eaSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*4c5491eaSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*4c5491eaSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*4c5491eaSAndrew Rist * distributed with this work for additional information 6*4c5491eaSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*4c5491eaSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*4c5491eaSAndrew Rist * "License"); you may not use this file except in compliance 9*4c5491eaSAndrew Rist * with the License. You may obtain a copy of the License at 10*4c5491eaSAndrew Rist * 11*4c5491eaSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*4c5491eaSAndrew Rist * 13*4c5491eaSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*4c5491eaSAndrew Rist * software distributed under the License is distributed on an 15*4c5491eaSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*4c5491eaSAndrew Rist * KIND, either express or implied. See the License for the 17*4c5491eaSAndrew Rist * specific language governing permissions and limitations 18*4c5491eaSAndrew Rist * under the License. 19*4c5491eaSAndrew Rist * 20*4c5491eaSAndrew Rist *************************************************************/ 21*4c5491eaSAndrew Rist 22*4c5491eaSAndrew Rist 23cdf0e10cSrcweir #ifndef SVX_HANGUL_HANJA_CONVERSION_HXX 24cdf0e10cSrcweir #define SVX_HANGUL_HANJA_CONVERSION_HXX 25cdf0e10cSrcweir 26cdf0e10cSrcweir #include <vcl/window.hxx> 27cdf0e10cSrcweir #include <memory> 28cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp> 29cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp> 30cdf0e10cSrcweir #include <com/sun/star/uno/Sequence.hxx> 31cdf0e10cSrcweir #include "editeng/editengdllapi.h" 32cdf0e10cSrcweir 33cdf0e10cSrcweir //............................................................................. 34cdf0e10cSrcweir namespace editeng 35cdf0e10cSrcweir { 36cdf0e10cSrcweir //............................................................................. 37cdf0e10cSrcweir 38cdf0e10cSrcweir class HangulHanjaConversion_Impl; 39cdf0e10cSrcweir 40cdf0e10cSrcweir //========================================================================= 41cdf0e10cSrcweir //= HangulHanjaConversion 42cdf0e10cSrcweir //========================================================================= 43cdf0e10cSrcweir /** encapsulates Hangul-Hanja conversion functionality 44cdf0e10cSrcweir 45cdf0e10cSrcweir <p>terminology: 46cdf0e10cSrcweir <ul><li>A <b>text <em>portion</em></b> is some (potentially large) piece of text 47cdf0e10cSrcweir which is to be analyzed for convertible sub-strings.</li> 48cdf0e10cSrcweir <li>A <b>text <em>unit</em></b> is a sub string in a text portion, which is 49cdf0e10cSrcweir to be converted as a whole.</li> 50cdf0e10cSrcweir </ul> 51cdf0e10cSrcweir For instance, you could have two independent selections within your document, which are then 52cdf0e10cSrcweir two text portions. A text unit would be single Hangul/Hanja words within a portion, or even 53cdf0e10cSrcweir single Hangul syllabills when "replace by character" is enabled. 54cdf0e10cSrcweir </p> 55cdf0e10cSrcweir */ 56cdf0e10cSrcweir class EDITENG_DLLPUBLIC HangulHanjaConversion 57cdf0e10cSrcweir { 58cdf0e10cSrcweir friend class HangulHanjaConversion_Impl; 59cdf0e10cSrcweir 60cdf0e10cSrcweir public: 61cdf0e10cSrcweir enum ReplacementAction 62cdf0e10cSrcweir { 63cdf0e10cSrcweir eExchange, // simply exchange one text with another 64cdf0e10cSrcweir eReplacementBracketed, // keep the original, and put the replacement in brackets after it 65cdf0e10cSrcweir eOriginalBracketed, // replace the original text, but put it in brackeds after the replacement 66cdf0e10cSrcweir eReplacementAbove, // keep the original, and put the replacement text as ruby text above it 67cdf0e10cSrcweir eOriginalAbove, // replace the original text, but put it as ruby text above it 68cdf0e10cSrcweir eReplacementBelow, // keep the original, and put the replacement text as ruby text below it 69cdf0e10cSrcweir eOriginalBelow // replace the original text, but put it as ruby text below it 70cdf0e10cSrcweir }; 71cdf0e10cSrcweir 72cdf0e10cSrcweir enum ConversionType // does not specify direction... 73cdf0e10cSrcweir { 74cdf0e10cSrcweir eConvHangulHanja, // Korean Hangul/Hanja conversion 75cdf0e10cSrcweir eConvSimplifiedTraditional // Chinese simplified / Chinese traditional conversion 76cdf0e10cSrcweir }; 77cdf0e10cSrcweir 78cdf0e10cSrcweir // Note: conversion direction for eConvSimplifiedTraditional is 79cdf0e10cSrcweir // specified by source language. 80cdf0e10cSrcweir // This one is for Hangul/Hanja where source and target language 81cdf0e10cSrcweir // are the same. 82cdf0e10cSrcweir enum ConversionDirection 83cdf0e10cSrcweir { 84cdf0e10cSrcweir eHangulToHanja, 85cdf0e10cSrcweir eHanjaToHangul 86cdf0e10cSrcweir }; 87cdf0e10cSrcweir 88cdf0e10cSrcweir enum ConversionFormat 89cdf0e10cSrcweir { 90cdf0e10cSrcweir eSimpleConversion, // used for simplified / traditional Chinese as well 91cdf0e10cSrcweir eHangulBracketed, 92cdf0e10cSrcweir eHanjaBracketed, 93cdf0e10cSrcweir eRubyHanjaAbove, 94cdf0e10cSrcweir eRubyHanjaBelow, 95cdf0e10cSrcweir eRubyHangulAbove, 96cdf0e10cSrcweir eRubyHangulBelow 97cdf0e10cSrcweir }; 98cdf0e10cSrcweir 99cdf0e10cSrcweir private: 100cdf0e10cSrcweir ::std::auto_ptr< HangulHanjaConversion_Impl > m_pImpl; 101cdf0e10cSrcweir 102cdf0e10cSrcweir // used to set initial values of m_pImpl object from saved ones 103cdf0e10cSrcweir static sal_Bool m_bUseSavedValues; // defines if the followng two values should be used for initialization 104cdf0e10cSrcweir static sal_Bool m_bTryBothDirectionsSave; 105cdf0e10cSrcweir static ConversionDirection m_ePrimaryConversionDirectionSave; 106cdf0e10cSrcweir 107cdf0e10cSrcweir // Forbidden and not implemented. 108cdf0e10cSrcweir HangulHanjaConversion (const HangulHanjaConversion &); 109cdf0e10cSrcweir HangulHanjaConversion & operator= (const HangulHanjaConversion &); 110cdf0e10cSrcweir 111cdf0e10cSrcweir public: 112cdf0e10cSrcweir HangulHanjaConversion( 113cdf0e10cSrcweir Window* _pUIParent, 114cdf0e10cSrcweir const ::com::sun::star::uno::Reference< ::com::sun::star::lang::XMultiServiceFactory >& _rxORB, 115cdf0e10cSrcweir const ::com::sun::star::lang::Locale& _rSourceLocale, 116cdf0e10cSrcweir const ::com::sun::star::lang::Locale& _rTargetLocale, 117cdf0e10cSrcweir const Font* _pTargetFont, 118cdf0e10cSrcweir sal_Int32 nOptions, 119cdf0e10cSrcweir sal_Bool _bIsInteractive 120cdf0e10cSrcweir ); 121cdf0e10cSrcweir 122cdf0e10cSrcweir virtual ~HangulHanjaConversion( ); 123cdf0e10cSrcweir 124cdf0e10cSrcweir // converts the whole document 125cdf0e10cSrcweir void ConvertDocument(); 126cdf0e10cSrcweir 127cdf0e10cSrcweir LanguageType GetSourceLanguage() const; 128cdf0e10cSrcweir LanguageType GetTargetLanguage() const; 129cdf0e10cSrcweir const Font * GetTargetFont() const; 130cdf0e10cSrcweir sal_Int32 GetConversionOptions() const; 131cdf0e10cSrcweir sal_Bool IsInteractive() const; 132cdf0e10cSrcweir 133cdf0e10cSrcweir // chinese text conversion 134cdf0e10cSrcweir static inline sal_Bool IsSimplified( LanguageType nLang ); 135cdf0e10cSrcweir static inline sal_Bool IsTraditional( LanguageType nLang ); 136cdf0e10cSrcweir static inline sal_Bool IsChinese( LanguageType nLang ); 137cdf0e10cSrcweir static inline sal_Bool IsSimilarChinese( LanguageType nLang1, LanguageType nLang2 ); 138cdf0e10cSrcweir 139cdf0e10cSrcweir // used to specify that the conversion direction states from the 140cdf0e10cSrcweir // last incarnation should be used as 141cdf0e10cSrcweir // initial conversion direction for the next incarnation. 142cdf0e10cSrcweir // (A hack used to transport a state information from 143cdf0e10cSrcweir // one incarnation to the next. Used in Writers text conversion...) 144cdf0e10cSrcweir static void SetUseSavedConversionDirectionState( sal_Bool bVal ); 145cdf0e10cSrcweir static sal_Bool IsUseSavedConversionDirectionState(); 146cdf0e10cSrcweir 147cdf0e10cSrcweir protected: 148cdf0e10cSrcweir /** retrieves the next text portion which is to be analyzed 149cdf0e10cSrcweir 150cdf0e10cSrcweir <p>pseudo-abstract, needs to be overridden</p> 151cdf0e10cSrcweir 152cdf0e10cSrcweir @param _rNextPortion 153cdf0e10cSrcweir upon return, this must contain the next text portion 154cdf0e10cSrcweir @param _rLangOfPortion 155cdf0e10cSrcweir upon return, this must contain the language for the found text portion. 156cdf0e10cSrcweir (necessary for Chinese translation since there are 5 language variants 157cdf0e10cSrcweir too look for even if the 'source' language usually is only 'simplified' 158cdf0e10cSrcweir or 'traditional'.) 159cdf0e10cSrcweir */ 160cdf0e10cSrcweir virtual void GetNextPortion( 161cdf0e10cSrcweir ::rtl::OUString& /* [out] */ _rNextPortion, 162cdf0e10cSrcweir LanguageType& /* [out] */ _rLangOfPortion, 163cdf0e10cSrcweir sal_Bool /* [in] */ _bAllowImplicitChangesForNotConvertibleText ); 164cdf0e10cSrcweir 165cdf0e10cSrcweir /** announces a new "current unit" 166cdf0e10cSrcweir 167cdf0e10cSrcweir <p>This will be called whenever it is necessary to interactively ask the user for 168cdf0e10cSrcweir a conversion. In such a case, a range within the current portion (see <member>GetNextPortion</member>) 169cdf0e10cSrcweir is presented to the user for chosing a substitution. Additionally, this method is called, 170cdf0e10cSrcweir so that derived classes can e.g. highlight this text range in a document view.</p> 171cdf0e10cSrcweir 172cdf0e10cSrcweir <p>Note that the indexes are relative to the most recent replace action. See 173cdf0e10cSrcweir <member>ReplaceUnit</member> for details.</p> 174cdf0e10cSrcweir 175cdf0e10cSrcweir @param _nUnitStart 176cdf0e10cSrcweir the start index of the unit 177cdf0e10cSrcweir 178cdf0e10cSrcweir @param _nUnitEnd 179cdf0e10cSrcweir the start index (exclusively!) of the unit. 180cdf0e10cSrcweir 181cdf0e10cSrcweir @param _bAllowImplicitChangesForNotConvertibleText 182cdf0e10cSrcweir allows implicit changes other than the text itself for the 183cdf0e10cSrcweir text parts not being convertible. 184cdf0e10cSrcweir Used for chinese translation to attribute all not convertible 185cdf0e10cSrcweir text (e.g. western text, empty paragraphs, spaces, ...) to 186cdf0e10cSrcweir the target language and target font of the conversion. 187cdf0e10cSrcweir This is to ensure that after the conversion any new text entered 188cdf0e10cSrcweir anywhere in the document will have the target language (of course 189cdf0e10cSrcweir CJK Language only) and target font (CJK font only) set. 190cdf0e10cSrcweir 191cdf0e10cSrcweir @see GetNextPortion 192cdf0e10cSrcweir */ 193cdf0e10cSrcweir virtual void HandleNewUnit( const sal_Int32 _nUnitStart, const sal_Int32 _nUnitEnd ); 194cdf0e10cSrcweir 195cdf0e10cSrcweir /** replaces a text unit within a text portion with a new text 196cdf0e10cSrcweir 197cdf0e10cSrcweir <p>pseudo-abstract, needs to be overridden</p> 198cdf0e10cSrcweir 199cdf0e10cSrcweir <p>Note an important thing about the indicies: They are always relative to the <em>previous 200cdf0e10cSrcweir call</em> of ReplaceUnit. This means whe you get a call to ReplaceUnit, and replace some text 201cdf0e10cSrcweir in your document, than you have to remember the document position immediately <em>behind</em> 202cdf0e10cSrcweir the changed text. In a next call to ReplaceUnit, an index of <em>0</em> will denote exactly 203cdf0e10cSrcweir this position behind the previous replacement<br/> 204cdf0e10cSrcweir The reaons for this is that this class here does not know anything about your document structure, 205cdf0e10cSrcweir so after a replacement took place, it's impossible to address anything in the range from the 206cdf0e10cSrcweir beginning of the portion up to the replaced text.<br/> 207cdf0e10cSrcweir In the very first call to ReplaceUnit, an index of <em>0</em> denotes the very first position of 208cdf0e10cSrcweir the current portion.</p> 209cdf0e10cSrcweir 210cdf0e10cSrcweir <p>If the language of the text to be replaced is different from 211cdf0e10cSrcweir the target language (as given by 'GetTargetLanguage') for example 212cdf0e10cSrcweir when converting simplified Chinese from/to traditional Chinese 213cdf0e10cSrcweir the language attribute of the new text has to be changed as well, 214cdf0e10cSrcweir **and** the font is to be set to the default (document) font for 215cdf0e10cSrcweir that language.</p> 216cdf0e10cSrcweir 217cdf0e10cSrcweir @param _nUnitStart 218cdf0e10cSrcweir the start index of the range to replace 219cdf0e10cSrcweir 220cdf0e10cSrcweir @param _nUnitEnd 221cdf0e10cSrcweir the end index (exclusively!) of the range to replace. E.g., an index 222cdf0e10cSrcweir pair (4,5) indicates a range of length 1. 223cdf0e10cSrcweir 224cdf0e10cSrcweir @param _rOrigText 225cdf0e10cSrcweir the original text to be replaced (as returned by GetNextPortion). 226cdf0e10cSrcweir Since in Chinese conversion the original text is needed as well 227cdf0e10cSrcweir in order to only do the minimal necassry text changes and to keep 228cdf0e10cSrcweir as much attributes as possible this is supplied here as well. 229cdf0e10cSrcweir 230cdf0e10cSrcweir @param _rReplaceWith 231cdf0e10cSrcweir The replacement text 232cdf0e10cSrcweir 233cdf0e10cSrcweir @param _rOffsets 234cdf0e10cSrcweir An sequence matching the indices (characters) of _rReplaceWith 235cdf0e10cSrcweir to the indices of the characters in the original text they are 236cdf0e10cSrcweir replacing. 237cdf0e10cSrcweir This is necessary since some portions of the text may get 238cdf0e10cSrcweir converted in portions of different length than the original. 239cdf0e10cSrcweir The sequence will be empty if all conversions in the text are 240cdf0e10cSrcweir of equal length. That is if always the character at index i in 241cdf0e10cSrcweir _rOffsets is replacing the character at index i in the original 242cdf0e10cSrcweir text for all valid index values of i. 243cdf0e10cSrcweir 244cdf0e10cSrcweir @param _eAction 245cdf0e10cSrcweir replacement action to take 246cdf0e10cSrcweir 247cdf0e10cSrcweir @param pNewUnitLanguage 248cdf0e10cSrcweir if the replacement unit is required to have a new language that 249cdf0e10cSrcweir is specified here. If the language is to be left unchanged this 250cdf0e10cSrcweir is the 0 pointer. 251cdf0e10cSrcweir */ 252cdf0e10cSrcweir virtual void ReplaceUnit( 253cdf0e10cSrcweir const sal_Int32 _nUnitStart, const sal_Int32 _nUnitEnd, 254cdf0e10cSrcweir const ::rtl::OUString& _rOrigText, 255cdf0e10cSrcweir const ::rtl::OUString& _rReplaceWith, 256cdf0e10cSrcweir const ::com::sun::star::uno::Sequence< sal_Int32 > &_rOffsets, 257cdf0e10cSrcweir ReplacementAction _eAction, 258cdf0e10cSrcweir LanguageType *pNewUnitLanguage 259cdf0e10cSrcweir ); 260cdf0e10cSrcweir 261cdf0e10cSrcweir /** specifies if rubies are supported by the document implementing 262cdf0e10cSrcweir this class. 263cdf0e10cSrcweir 264cdf0e10cSrcweir @return 265cdf0e10cSrcweir <TRUE/> if rubies are supported. 266cdf0e10cSrcweir */ 267cdf0e10cSrcweir virtual sal_Bool HasRubySupport() const; 268cdf0e10cSrcweir }; 269cdf0e10cSrcweir IsSimplified(LanguageType nLang)270cdf0e10cSrcweir sal_Bool HangulHanjaConversion::IsSimplified( LanguageType nLang ) 271cdf0e10cSrcweir { 272cdf0e10cSrcweir return nLang == LANGUAGE_CHINESE_SIMPLIFIED || 273cdf0e10cSrcweir nLang == LANGUAGE_CHINESE_SINGAPORE; 274cdf0e10cSrcweir } 275cdf0e10cSrcweir IsTraditional(LanguageType nLang)276cdf0e10cSrcweir sal_Bool HangulHanjaConversion::IsTraditional( LanguageType nLang ) 277cdf0e10cSrcweir { 278cdf0e10cSrcweir return nLang == LANGUAGE_CHINESE_TRADITIONAL || 279cdf0e10cSrcweir nLang == LANGUAGE_CHINESE_HONGKONG || 280cdf0e10cSrcweir nLang == LANGUAGE_CHINESE_MACAU; 281cdf0e10cSrcweir } 282cdf0e10cSrcweir IsChinese(LanguageType nLang)283cdf0e10cSrcweir sal_Bool HangulHanjaConversion::IsChinese( LanguageType nLang ) 284cdf0e10cSrcweir { 285cdf0e10cSrcweir return IsTraditional( nLang ) || IsSimplified( nLang ); 286cdf0e10cSrcweir } 287cdf0e10cSrcweir IsSimilarChinese(LanguageType nLang1,LanguageType nLang2)288cdf0e10cSrcweir sal_Bool HangulHanjaConversion::IsSimilarChinese( LanguageType nLang1, LanguageType nLang2 ) 289cdf0e10cSrcweir { 290cdf0e10cSrcweir return (IsTraditional(nLang1) && IsTraditional(nLang2)) || 291cdf0e10cSrcweir (IsSimplified(nLang1) && IsSimplified(nLang2)); 292cdf0e10cSrcweir } 293cdf0e10cSrcweir 294cdf0e10cSrcweir //............................................................................. 295cdf0e10cSrcweir } // namespace svx 296cdf0e10cSrcweir //............................................................................. 297cdf0e10cSrcweir 298cdf0e10cSrcweir #endif // SVX_HANGUL_HANJA_CONVERSION_HXX 299