1*4c5491eaSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*4c5491eaSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*4c5491eaSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*4c5491eaSAndrew Rist  * distributed with this work for additional information
6*4c5491eaSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*4c5491eaSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*4c5491eaSAndrew Rist  * "License"); you may not use this file except in compliance
9*4c5491eaSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*4c5491eaSAndrew Rist  *
11*4c5491eaSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*4c5491eaSAndrew Rist  *
13*4c5491eaSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*4c5491eaSAndrew Rist  * software distributed under the License is distributed on an
15*4c5491eaSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*4c5491eaSAndrew Rist  * KIND, either express or implied.  See the License for the
17*4c5491eaSAndrew Rist  * specific language governing permissions and limitations
18*4c5491eaSAndrew Rist  * under the License.
19*4c5491eaSAndrew Rist  *
20*4c5491eaSAndrew Rist  *************************************************************/
21*4c5491eaSAndrew Rist 
22*4c5491eaSAndrew Rist 
23cdf0e10cSrcweir #ifndef SVX_HANGUL_HANJA_CONVERSION_HXX
24cdf0e10cSrcweir #define SVX_HANGUL_HANJA_CONVERSION_HXX
25cdf0e10cSrcweir 
26cdf0e10cSrcweir #include <vcl/window.hxx>
27cdf0e10cSrcweir #include <memory>
28cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp>
29cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp>
30cdf0e10cSrcweir #include <com/sun/star/uno/Sequence.hxx>
31cdf0e10cSrcweir #include "editeng/editengdllapi.h"
32cdf0e10cSrcweir 
33cdf0e10cSrcweir //.............................................................................
34cdf0e10cSrcweir namespace editeng
35cdf0e10cSrcweir {
36cdf0e10cSrcweir //.............................................................................
37cdf0e10cSrcweir 
38cdf0e10cSrcweir 	class HangulHanjaConversion_Impl;
39cdf0e10cSrcweir 
40cdf0e10cSrcweir 	//=========================================================================
41cdf0e10cSrcweir 	//= HangulHanjaConversion
42cdf0e10cSrcweir 	//=========================================================================
43cdf0e10cSrcweir 	/** encapsulates Hangul-Hanja conversion functionality
44cdf0e10cSrcweir 
45cdf0e10cSrcweir 		<p>terminology:
46cdf0e10cSrcweir 			<ul><li>A <b>text <em>portion</em></b> is some (potentially large) piece of text
47cdf0e10cSrcweir 				which is to be analyzed for convertible sub-strings.</li>
48cdf0e10cSrcweir 				<li>A <b>text <em>unit</em></b> is a sub string in a text portion, which is
49cdf0e10cSrcweir 				to be converted as a whole.</li>
50cdf0e10cSrcweir 			</ul>
51cdf0e10cSrcweir 			For instance, you could have two independent selections within your document, which are then
52cdf0e10cSrcweir 			two text portions. A text unit would be single Hangul/Hanja words within a portion, or even
53cdf0e10cSrcweir 			single Hangul syllabills when "replace by character" is enabled.
54cdf0e10cSrcweir 		</p>
55cdf0e10cSrcweir 	*/
56cdf0e10cSrcweir 	class EDITENG_DLLPUBLIC HangulHanjaConversion
57cdf0e10cSrcweir 	{
58cdf0e10cSrcweir 		friend class HangulHanjaConversion_Impl;
59cdf0e10cSrcweir 
60cdf0e10cSrcweir 	public:
61cdf0e10cSrcweir 		enum ReplacementAction
62cdf0e10cSrcweir 		{
63cdf0e10cSrcweir 			eExchange,				// simply exchange one text with another
64cdf0e10cSrcweir 			eReplacementBracketed,	// keep the original, and put the replacement in brackets after it
65cdf0e10cSrcweir 			eOriginalBracketed,		// replace the original text, but put it in brackeds after the replacement
66cdf0e10cSrcweir 			eReplacementAbove,		// keep the original, and put the replacement text as ruby text above it
67cdf0e10cSrcweir 			eOriginalAbove,			// replace the original text, but put it as ruby text above it
68cdf0e10cSrcweir 			eReplacementBelow,		// keep the original, and put the replacement text as ruby text below it
69cdf0e10cSrcweir 			eOriginalBelow			// replace the original text, but put it as ruby text below it
70cdf0e10cSrcweir 		};
71cdf0e10cSrcweir 
72cdf0e10cSrcweir         enum ConversionType             // does not specify direction...
73cdf0e10cSrcweir         {
74cdf0e10cSrcweir             eConvHangulHanja,           // Korean Hangul/Hanja conversion
75cdf0e10cSrcweir             eConvSimplifiedTraditional  // Chinese simplified / Chinese traditional conversion
76cdf0e10cSrcweir         };
77cdf0e10cSrcweir 
78cdf0e10cSrcweir         // Note: conversion direction for eConvSimplifiedTraditional is
79cdf0e10cSrcweir         // specified by source language.
80cdf0e10cSrcweir         // This one is for Hangul/Hanja where source and target language
81cdf0e10cSrcweir         // are the same.
82cdf0e10cSrcweir         enum ConversionDirection
83cdf0e10cSrcweir 		{
84cdf0e10cSrcweir 			eHangulToHanja,
85cdf0e10cSrcweir 			eHanjaToHangul
86cdf0e10cSrcweir 		};
87cdf0e10cSrcweir 
88cdf0e10cSrcweir 		enum ConversionFormat
89cdf0e10cSrcweir 		{
90cdf0e10cSrcweir             eSimpleConversion,          // used for simplified / traditional Chinese as well
91cdf0e10cSrcweir 			eHangulBracketed,
92cdf0e10cSrcweir 			eHanjaBracketed,
93cdf0e10cSrcweir 			eRubyHanjaAbove,
94cdf0e10cSrcweir 			eRubyHanjaBelow,
95cdf0e10cSrcweir 			eRubyHangulAbove,
96cdf0e10cSrcweir 			eRubyHangulBelow
97cdf0e10cSrcweir 		};
98cdf0e10cSrcweir 
99cdf0e10cSrcweir 	private:
100cdf0e10cSrcweir 		::std::auto_ptr< HangulHanjaConversion_Impl >	m_pImpl;
101cdf0e10cSrcweir 
102cdf0e10cSrcweir 		// used to set initial values of m_pImpl object from saved ones
103cdf0e10cSrcweir 		static sal_Bool				m_bUseSavedValues;	// defines if the followng two values should be used for initialization
104cdf0e10cSrcweir 		static sal_Bool				m_bTryBothDirectionsSave;
105cdf0e10cSrcweir         static ConversionDirection	m_ePrimaryConversionDirectionSave;
106cdf0e10cSrcweir 
107cdf0e10cSrcweir 		// Forbidden and not implemented.
108cdf0e10cSrcweir 		HangulHanjaConversion (const HangulHanjaConversion &);
109cdf0e10cSrcweir 		HangulHanjaConversion & operator= (const HangulHanjaConversion &);
110cdf0e10cSrcweir 
111cdf0e10cSrcweir     public:
112cdf0e10cSrcweir         HangulHanjaConversion(
113cdf0e10cSrcweir             Window* _pUIParent,
114cdf0e10cSrcweir             const ::com::sun::star::uno::Reference< ::com::sun::star::lang::XMultiServiceFactory >& _rxORB,
115cdf0e10cSrcweir             const ::com::sun::star::lang::Locale& _rSourceLocale,
116cdf0e10cSrcweir             const ::com::sun::star::lang::Locale& _rTargetLocale,
117cdf0e10cSrcweir             const Font* _pTargetFont,
118cdf0e10cSrcweir             sal_Int32 nOptions,
119cdf0e10cSrcweir             sal_Bool _bIsInteractive
120cdf0e10cSrcweir         );
121cdf0e10cSrcweir 
122cdf0e10cSrcweir         virtual ~HangulHanjaConversion( );
123cdf0e10cSrcweir 
124cdf0e10cSrcweir         // converts the whole document
125cdf0e10cSrcweir         void    ConvertDocument();
126cdf0e10cSrcweir 
127cdf0e10cSrcweir         LanguageType    GetSourceLanguage() const;
128cdf0e10cSrcweir         LanguageType    GetTargetLanguage() const;
129cdf0e10cSrcweir         const Font *    GetTargetFont() const;
130cdf0e10cSrcweir         sal_Int32       GetConversionOptions() const;
131cdf0e10cSrcweir         sal_Bool        IsInteractive() const;
132cdf0e10cSrcweir 
133cdf0e10cSrcweir         // chinese text conversion
134cdf0e10cSrcweir         static inline sal_Bool IsSimplified( LanguageType nLang );
135cdf0e10cSrcweir         static inline sal_Bool IsTraditional( LanguageType nLang );
136cdf0e10cSrcweir         static inline sal_Bool IsChinese( LanguageType nLang );
137cdf0e10cSrcweir         static inline sal_Bool IsSimilarChinese( LanguageType nLang1, LanguageType nLang2 );
138cdf0e10cSrcweir 
139cdf0e10cSrcweir 		// used to specify that the conversion direction states from the
140cdf0e10cSrcweir 		// last incarnation should be used as
141cdf0e10cSrcweir 		// initial conversion direction for the next incarnation.
142cdf0e10cSrcweir 		// (A hack used to transport a state information from
143cdf0e10cSrcweir 		// one incarnation to the next. Used in Writers text conversion...)
144cdf0e10cSrcweir 		static void		SetUseSavedConversionDirectionState( sal_Bool bVal );
145cdf0e10cSrcweir 		static sal_Bool IsUseSavedConversionDirectionState();
146cdf0e10cSrcweir 
147cdf0e10cSrcweir 	protected:
148cdf0e10cSrcweir 		/** retrieves the next text portion which is to be analyzed
149cdf0e10cSrcweir 
150cdf0e10cSrcweir 			<p>pseudo-abstract, needs to be overridden</p>
151cdf0e10cSrcweir 
152cdf0e10cSrcweir 			@param _rNextPortion
153cdf0e10cSrcweir 				upon return, this must contain the next text portion
154cdf0e10cSrcweir             @param _rLangOfPortion
155cdf0e10cSrcweir                 upon return, this must contain the language for the found text portion.
156cdf0e10cSrcweir                 (necessary for Chinese translation since there are 5 language variants
157cdf0e10cSrcweir                 too look for even if the 'source' language usually is only 'simplified'
158cdf0e10cSrcweir                 or 'traditional'.)
159cdf0e10cSrcweir 		*/
160cdf0e10cSrcweir         virtual void    GetNextPortion(
161cdf0e10cSrcweir                 ::rtl::OUString& /* [out] */ _rNextPortion,
162cdf0e10cSrcweir                 LanguageType& /* [out] */ _rLangOfPortion,
163cdf0e10cSrcweir                 sal_Bool /* [in] */ _bAllowImplicitChangesForNotConvertibleText );
164cdf0e10cSrcweir 
165cdf0e10cSrcweir 		/** announces a new "current unit"
166cdf0e10cSrcweir 
167cdf0e10cSrcweir 			<p>This will be called whenever it is necessary to interactively ask the user for
168cdf0e10cSrcweir 			a conversion. In such a case, a range within the current portion (see <member>GetNextPortion</member>)
169cdf0e10cSrcweir 			is presented to the user for chosing a substitution. Additionally, this method is called,
170cdf0e10cSrcweir 			so that derived classes can e.g. highlight this text range in a document view.</p>
171cdf0e10cSrcweir 
172cdf0e10cSrcweir 			<p>Note that the indexes are relative to the most recent replace action. See
173cdf0e10cSrcweir 			<member>ReplaceUnit</member> for details.</p>
174cdf0e10cSrcweir 
175cdf0e10cSrcweir 			@param _nUnitStart
176cdf0e10cSrcweir 				the start index of the unit
177cdf0e10cSrcweir 
178cdf0e10cSrcweir 			@param _nUnitEnd
179cdf0e10cSrcweir 				the start index (exclusively!) of the unit.
180cdf0e10cSrcweir 
181cdf0e10cSrcweir             @param _bAllowImplicitChangesForNotConvertibleText
182cdf0e10cSrcweir                 allows implicit changes other than the text itself for the
183cdf0e10cSrcweir                 text parts not being convertible.
184cdf0e10cSrcweir                 Used for chinese translation to attribute all not convertible
185cdf0e10cSrcweir                 text (e.g. western text, empty paragraphs, spaces, ...) to
186cdf0e10cSrcweir                 the target language and target font of the conversion.
187cdf0e10cSrcweir                 This is to ensure that after the conversion any new text entered
188cdf0e10cSrcweir                 anywhere in the document will have the target language (of course
189cdf0e10cSrcweir                 CJK Language only) and target font (CJK font only) set.
190cdf0e10cSrcweir 
191cdf0e10cSrcweir 			@see GetNextPortion
192cdf0e10cSrcweir 		*/
193cdf0e10cSrcweir 		virtual void	HandleNewUnit( const sal_Int32 _nUnitStart, const sal_Int32 _nUnitEnd );
194cdf0e10cSrcweir 
195cdf0e10cSrcweir 		/** replaces a text unit within a text portion with a new text
196cdf0e10cSrcweir 
197cdf0e10cSrcweir 			<p>pseudo-abstract, needs to be overridden</p>
198cdf0e10cSrcweir 
199cdf0e10cSrcweir 			<p>Note an important thing about the indicies: They are always relative to the <em>previous
200cdf0e10cSrcweir 			call</em> of ReplaceUnit. This means whe you get a call to ReplaceUnit, and replace some text
201cdf0e10cSrcweir 			in your document, than you have to remember the document position immediately <em>behind</em>
202cdf0e10cSrcweir 			the changed text. In a next call to ReplaceUnit, an index of <em>0</em> will denote exactly
203cdf0e10cSrcweir 			this position behind the previous replacement<br/>
204cdf0e10cSrcweir 			The reaons for this is that this class here does not know anything about your document structure,
205cdf0e10cSrcweir 			so after a replacement took place, it's impossible to address anything in the range from the
206cdf0e10cSrcweir 			beginning of the portion up to the replaced text.<br/>
207cdf0e10cSrcweir 			In the very first call to ReplaceUnit, an index of <em>0</em> denotes the very first position of
208cdf0e10cSrcweir 			the current portion.</p>
209cdf0e10cSrcweir 
210cdf0e10cSrcweir             <p>If the language of the text to be replaced is different from
211cdf0e10cSrcweir             the target language (as given by 'GetTargetLanguage') for example
212cdf0e10cSrcweir             when converting simplified Chinese from/to traditional Chinese
213cdf0e10cSrcweir             the language attribute of the new text has to be changed as well,
214cdf0e10cSrcweir 			**and** the font is to be set to the default (document) font for
215cdf0e10cSrcweir 			that language.</p>
216cdf0e10cSrcweir 
217cdf0e10cSrcweir 			@param _nUnitStart
218cdf0e10cSrcweir 				the start index of the range to replace
219cdf0e10cSrcweir 
220cdf0e10cSrcweir 			@param _nUnitEnd
221cdf0e10cSrcweir 				the end index (exclusively!) of the range to replace. E.g., an index
222cdf0e10cSrcweir 				pair (4,5) indicates a range of length 1.
223cdf0e10cSrcweir 
224cdf0e10cSrcweir             @param _rOrigText
225cdf0e10cSrcweir                 the original text to be replaced (as returned by GetNextPortion).
226cdf0e10cSrcweir                 Since in Chinese conversion the original text is needed as well
227cdf0e10cSrcweir                 in order to only do the minimal necassry text changes and to keep
228cdf0e10cSrcweir                 as much attributes as possible this is supplied here as well.
229cdf0e10cSrcweir 
230cdf0e10cSrcweir 			@param _rReplaceWith
231cdf0e10cSrcweir 				The replacement text
232cdf0e10cSrcweir 
233cdf0e10cSrcweir             @param _rOffsets
234cdf0e10cSrcweir                 An sequence matching the indices (characters) of _rReplaceWith
235cdf0e10cSrcweir                 to the indices of the characters in the original text they are
236cdf0e10cSrcweir                 replacing.
237cdf0e10cSrcweir                 This is necessary since some portions of the text may get
238cdf0e10cSrcweir                 converted in portions of different length than the original.
239cdf0e10cSrcweir                 The sequence will be empty if all conversions in the text are
240cdf0e10cSrcweir                 of equal length. That is if always the character at index i in
241cdf0e10cSrcweir                 _rOffsets is replacing the character at index i in the original
242cdf0e10cSrcweir                 text for all valid index values of i.
243cdf0e10cSrcweir 
244cdf0e10cSrcweir             @param _eAction
245cdf0e10cSrcweir 				replacement action to take
246cdf0e10cSrcweir 
247cdf0e10cSrcweir             @param pNewUnitLanguage
248cdf0e10cSrcweir                 if the replacement unit is required to have a new language that
249cdf0e10cSrcweir                 is specified here. If the language is to be left unchanged this
250cdf0e10cSrcweir                 is the 0 pointer.
251cdf0e10cSrcweir 		*/
252cdf0e10cSrcweir 		virtual void	ReplaceUnit(
253cdf0e10cSrcweir 							const sal_Int32 _nUnitStart, const sal_Int32 _nUnitEnd,
254cdf0e10cSrcweir                             const ::rtl::OUString& _rOrigText,
255cdf0e10cSrcweir                             const ::rtl::OUString& _rReplaceWith,
256cdf0e10cSrcweir                             const ::com::sun::star::uno::Sequence< sal_Int32 > &_rOffsets,
257cdf0e10cSrcweir                             ReplacementAction _eAction,
258cdf0e10cSrcweir                             LanguageType *pNewUnitLanguage
259cdf0e10cSrcweir 						);
260cdf0e10cSrcweir 
261cdf0e10cSrcweir         /** specifies if rubies are supported by the document implementing
262cdf0e10cSrcweir             this class.
263cdf0e10cSrcweir 
264cdf0e10cSrcweir             @return
265cdf0e10cSrcweir                 <TRUE/> if rubies are supported.
266cdf0e10cSrcweir         */
267cdf0e10cSrcweir         virtual sal_Bool    HasRubySupport() const;
268cdf0e10cSrcweir 	};
269cdf0e10cSrcweir 
IsSimplified(LanguageType nLang)270cdf0e10cSrcweir     sal_Bool HangulHanjaConversion::IsSimplified( LanguageType nLang )
271cdf0e10cSrcweir     {
272cdf0e10cSrcweir         return  nLang == LANGUAGE_CHINESE_SIMPLIFIED ||
273cdf0e10cSrcweir                 nLang == LANGUAGE_CHINESE_SINGAPORE;
274cdf0e10cSrcweir     }
275cdf0e10cSrcweir 
IsTraditional(LanguageType nLang)276cdf0e10cSrcweir     sal_Bool HangulHanjaConversion::IsTraditional( LanguageType nLang )
277cdf0e10cSrcweir     {
278cdf0e10cSrcweir         return  nLang == LANGUAGE_CHINESE_TRADITIONAL ||
279cdf0e10cSrcweir                 nLang == LANGUAGE_CHINESE_HONGKONG ||
280cdf0e10cSrcweir                 nLang == LANGUAGE_CHINESE_MACAU;
281cdf0e10cSrcweir     }
282cdf0e10cSrcweir 
IsChinese(LanguageType nLang)283cdf0e10cSrcweir     sal_Bool HangulHanjaConversion::IsChinese( LanguageType nLang )
284cdf0e10cSrcweir     {
285cdf0e10cSrcweir         return IsTraditional( nLang ) || IsSimplified( nLang );
286cdf0e10cSrcweir     }
287cdf0e10cSrcweir 
IsSimilarChinese(LanguageType nLang1,LanguageType nLang2)288cdf0e10cSrcweir     sal_Bool HangulHanjaConversion::IsSimilarChinese( LanguageType nLang1, LanguageType nLang2 )
289cdf0e10cSrcweir     {
290cdf0e10cSrcweir         return (IsTraditional(nLang1) && IsTraditional(nLang2)) ||
291cdf0e10cSrcweir                (IsSimplified(nLang1)  && IsSimplified(nLang2));
292cdf0e10cSrcweir     }
293cdf0e10cSrcweir 
294cdf0e10cSrcweir //.............................................................................
295cdf0e10cSrcweir }	// namespace svx
296cdf0e10cSrcweir //.............................................................................
297cdf0e10cSrcweir 
298cdf0e10cSrcweir #endif // SVX_HANGUL_HANJA_CONVERSION_HXX
299