xref: /trunk/test/testuno/source/api/i18n/XCharacterClassificationTest.java (revision 2f709283d1bd576d3b419fe5eab3c9c4e094bc79)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package api.i18n;
25 
26 import com.sun.star.i18n.KParseTokens;
27 import com.sun.star.i18n.KParseType;
28 import com.sun.star.i18n.ParseResult;
29 import com.sun.star.i18n.XCharacterClassification;
30 import com.sun.star.lang.Locale;
31 import com.sun.star.uno.UnoRuntime;
32 import com.sun.star.uno.XComponentContext;
33 import org.junit.After;
34 import org.junit.AfterClass;
35 import org.junit.Before;
36 import org.junit.BeforeClass;
37 import org.junit.Assert;
38 import org.junit.Test;
39 import org.openoffice.test.uno.UnoApp;
40 
41 /**
42  * Testing <code>com.sun.star.i18n.XCharacterClassification</code>
43  * interface methods:
44  * <ul>
45  *  <li><code> toUpper() </code></li>
46  *  <li><code> toLower() </code></li>
47  *  <li><code> toTitle() </code></li>
48  *  <li><code> getType() </code></li>
49  *  <li><code> getCharacterType() </code></li>
50  *  <li><code> getStringType() </code></li>
51  *  <li><code> getCharacterDirection() </code></li>
52  *  <li><code> getScript() </code></li>
53  *  <li><code> parseAnyToken() </code></li>
54  *  <li><code> parsePredefinedToken() </code></li>
55  * </ul><p>
56  * Test is <b> NOT </b> multithread compliant. <p>
57  * @see com.sun.star.i18n.XCharacterClassification
58  */
59 public class XCharacterClassificationTest {
60     private static final UnoApp app = new UnoApp();
61 
62     private XComponentContext xContext = null;
63     public XCharacterClassification oObj = null;
64     public String[] languages = new String[]{"de","en","es","fr","ja","ko","zh"};
65     public String[] countries = new String[]{"DE","US","ES","FR","JP","KR","CN"};
66 
67     public String[] charstyles_java = new String[] {"UNASSIGNED","UPPERCASE_LETTER",
68         "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER",
69         "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK",
70         "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR",
71         "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","none17",
72         "PRIVATE_USE","none19","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION",
73         "CONNECTOR_PUNCTUATION","OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL",
74         "MODIFIER_SYMBOL","OTHER_SYMBOL"};
75 
76     public String[] charstyles_office = new String[] {"UNASSIGNED","UPPERCASE_LETTER",
77         "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER",
78         "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK",
79         "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR",
80         "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","PRIVATE_USE",
81         "OTHER_PUNCTUATION","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION",
82         "CONNECTOR_PUNCTUATION",
83         "OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL","MODIFIER_SYMBOL",
84         "OTHER_SYMBOL","INITIAL_PUNCTUATION","FINAL_PUNCTUATION","GENERAL_TYPES_COUNT"};
85 
86     public String[] unicode_script = new String[] {"U_BASIC_LATIN","U_LATIN_1_SUPPLEMENT",
87         "U_LATIN_EXTENDED_A","U_LATIN_EXTENDED_B","U_IPA_EXTENSIONS","U_SPACING_MODIFIER_LETTERS",
88         "U_COMBINING_DIACRITICAL_MARKS","U_GREEK","U_CYRILLIC","U_ARMENIAN","U_HEBREW",
89         "U_ARABIC","U_SYRIAC","U_THAANA","U_DEVANAGARI","U_BENGALI","U_GURMUKHI",
90         "U_GUJARATI","U_ORIYA","U_TAMIL","U_TELUGU","U_KANNADA","U_MALAYALAM",
91         "U_SINHALA","U_THAI","U_LAO","U_TIBETAN","U_MYANMAR","U_GEORGIAN",
92         "U_HANGUL_JAMO","U_ETHIOPIC","U_CHEROKEE","U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
93         "U_OGHAM","U_RUNIC","U_KHMER","U_MONGOLIAN","U_LATIN_EXTENDED_ADDITIONAL",
94         "U_GREEK_EXTENDED","U_GENERAL_PUNCTUATION","U_SUPERSCRIPTS_AND_SUBSCRIPTS",
95         "U_CURRENCY_SYMBOLS","U_COMBINING_MARKS_FOR_SYMBOLS","U_LETTERLIKE_SYMBOLS",
96         "U_NUMBER_FORMS","U_ARROWS","U_MATHEMATICAL_OPERATORS","U_MISCELLANEOUS_TECHNICAL",
97         "U_CONTROL_PICTURES","U_OPTICAL_CHARACTER_RECOGNITION","U_ENCLOSED_ALPHANUMERICS",
98         "U_BOX_DRAWING","U_BLOCK_ELEMENTS","U_GEOMETRIC_SHAPES","U_MISCELLANEOUS_SYMBOLS",
99         "U_DINGBATS","U_BRAILLE_PATTERNS","U_CJK_RADICALS_SUPPLEMENT","U_KANGXI_RADICALS",
100         "U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS","U_CJK_SYMBOLS_AND_PUNCTUATION",
101         "U_HIRAGANA","U_KATAKANA","U_BOPOMOFO","U_HANGUL_COMPATIBILITY_JAMO","U_KANBUN",
102         "U_BOPOMOFO_EXTENDED","U_ENCLOSED_CJK_LETTERS_AND_MONTHS","U_CJK_COMPATIBILITY",
103         "U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A","U_CJK_UNIFIED_IDEOGRAPHS","U_YI_SYLLABLES",
104         "U_YI_RADICALS","U_HANGUL_SYLLABLES","U_HIGH_SURROGATES","U_HIGH_PRIVATE_USE_SURROGATES",
105         "U_LOW_SURROGATES","U_PRIVATE_USE_AREA","U_CJK_COMPATIBILITY_IDEOGRAPHS",
106         "U_ALPHABETIC_PRESENTATION_FORMS","U_ARABIC_PRESENTATION_FORMS_A","U_COMBINING_HALF_MARKS",
107         "U_CJK_COMPATIBILITY_FORMS","U_SMALL_FORM_VARIANTS","U_ARABIC_PRESENTATION_FORMS_B",
108         "U_SPECIALS","U_HALFWIDTH_AND_FULLWIDTH_FORMS","U_CHAR_SCRIPT_COUNT","U_NO_SCRIPT"};
109 
110     // setup and close connections
111     @BeforeClass
112     public static void setUpConnection() throws Exception
113     {
114         app.start();
115     }
116 
117     @AfterClass
118     public static void tearDownConnection() throws InterruptedException, com.sun.star.uno.Exception
119     {
120         app.close();
121     }
122 
123     @Before
124     public void before() throws Exception {
125         xContext = app.getComponentContext();
126         oObj = UnoRuntime.queryInterface(
127             XCharacterClassification.class,
128             xContext.getServiceManager().createInstanceWithContext("com.sun.star.i18n.CharacterClassification", xContext)
129         );
130     }
131 
132     /**
133     * Test calls the method for different locales. Then each result is compared
134     * with a string, converted to a upper case using
135     * <code>java.lang.String</code> method <code>toUpperCase()</code>.<p>
136     * Has <b> OK </b> status if string, returned by the method is equal to
137     * a string that is returned by String.toUpperCase() for all locales.
138     */
139     @Test
140     public void _toUpper() {
141         boolean res = true;
142         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404};
143         String toCheck = new String(characters);
144         String get = "";
145         String exp = "";
146 
147         for (int i=0;i<7;i++) {
148             get = oObj.toUpper(toCheck, 0, toCheck.length(), getLocale(i));
149             exp = toCheck.toUpperCase(
150                 new java.util.Locale(languages[i], countries[i]));
151             res &= get.equals(exp);
152             if (!res) {
153                 System.out.println("FAILED for: language=" + languages[i] +
154                     " ; country=" + countries[i]);
155                 System.out.println("Expected: " + exp);
156                 System.out.println("Gained : " + get);
157             }
158         }
159         Assert.assertTrue("toUpper()", res);
160     }
161 
162     /**
163     * Test calls the method for different locales. Then each result is compared
164     * with a string, converted to a lower case using
165     * <code>java.lang.String</code> method <code>toLowerCase()</code>.<p>
166     * Has <b> OK </b> status if string, returned by the method is equal to
167     * a string that is returned by String.toLowerCase() for all locales.
168     */
169     @Test
170     public void _toLower() {
171         boolean res = true;
172         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404};
173         String toCheck = new String(characters);
174         String get = "";
175         String exp = "";
176 
177         for (int i=0;i<7;i++) {
178             get = oObj.toLower(toCheck,0,toCheck.length(),getLocale(i));
179             exp = toCheck.toLowerCase(
180                 new java.util.Locale(languages[i],countries[i]));
181             res &= get.equals(exp);
182             if (!res) {
183                 System.out.println("FAILED for: language=" + languages[i]
184                     + " ; country=" + countries[i]);
185                 System.out.println("Expected: " + exp);
186                 System.out.println("Gained : " + get);
187             }
188         }
189         Assert.assertTrue("toLower()", res);
190     }
191 
192     /**
193     * Test calls the method for different locales. Then each result is compared
194     * with a string, converted to a title case using
195     * <code>java.lang.Character</code> method <code>toTitleCase()</code>.<p>
196     * Has <b> OK </b> status if string, returned by the method is equal to
197     * a string that was converted using Character.toTitleCase() for all locales.
198     */
199     @Test
200     public void _toTitle() {
201         boolean res = true;
202         String toCheck = new String(new char[]{8112});
203         String get = "";
204         String exp = "";
205 
206         for (int i=0;i<7;i++) {
207             get = oObj.toTitle(toCheck, 0, 1, getLocale(i));
208             exp = new String(
209                 new char[]{Character.toTitleCase(toCheck.toCharArray()[0])});
210             res &= get.equals(exp);
211             if (!res) {
212                 System.out.println("FAILED for: language=" + languages[i]
213                     + " ; country=" + countries[i]);
214                 System.out.println("Expected: " + exp);
215                 System.out.println("Gained : " + get);
216             }
217         }
218         Assert.assertTrue("toTitle()", res);
219     }
220 
221     /**
222     * At first we define <code>int[]</code> and <code>char[]</code> arrays of
223     * unicode symbol numbers, arranged as sequences, where symbols are sorted
224     * by type, so the character of <code>i<sup><small>th</small></sup></code>
225     * type is located on <code>i<sup><small>th</small></sup></code> position.<p>
226     * Has <b> OK </b> status if for all 30 types the method returns value, that
227     * is equal to an element number.<p>
228     * @see com.sun.star.i18n.CharType
229     */
230     @Test
231     public void _getType() {
232         boolean res = true;
233         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404,
234             48,8544,179,32,8232,8233,144,8204,57344,56320,173,40,41,95,3852,247,
235             3647,901,3896,171,187};
236         int[] charsInt = new int[]{586,65,97,498,721,4588,772,8413,3404,48,
237             8544,179,32,8232,8233,144,8204,57344,56320,173,40,41,95,3852,247,
238             3647,901,3896,171,187};
239         String toCheck = new String(characters);
240 
241         for (int i=0;i<characters.length;i++) {
242             int get = oObj.getType(toCheck, i);
243             res &= (charstyles_office[get] == charstyles_office[i]);
244             if (!res) {
245                 System.out.println("Code :" + Integer.toHexString(charsInt[i]));
246                 System.out.println("Gained: " + charstyles_office[get]);
247                 System.out.println("Expected : " + charstyles_office[i]);
248             }
249         }
250         Assert.assertTrue("getType()", res);
251     }
252 
253     /**
254     * After defining string to be checked and array of expected types, test
255     * calls the method for each character of a string and for all locales.<p>
256     * Has <b> OK </b> status if the method returns type, expected for a given
257     * character and locale.
258     */
259     @Test
260     public void _getCharacterType() {
261         boolean res = true;
262         String toCheck = "Ab0)";
263         int[] expected = new int[]{226,228,97,32};
264 
265         for (int i=0;i<toCheck.length();i++) {
266             for (int j=1;j<7;j++) {
267                 int get = oObj.getCharacterType(toCheck, i, getLocale(j));
268                 res &= (get == expected[i]);
269                 if (!res) {
270                     System.out.println("FAILED for: language=" + languages[j] +
271                         " ; country=" + countries[j]);
272                     System.out.println("Sysmbol :" + toCheck.toCharArray()[i]);
273                     System.out.println("Gained: " + get);
274                     System.out.println("Expected : " + expected[i]);
275                 }
276             }
277         }
278         Assert.assertTrue("getCharacterType()", res);
279     }
280 
281     /**
282     * After defining array of strings to be checked and array of expected types,
283     * test calls the method for each string of an array and for all locales.<p>
284     * Has <b> OK </b> status if the method returns type, expected for a given
285     * string and locale.
286     */
287     @Test
288     public void _getStringType() {
289         boolean res = true;
290         String[] toCheck = new String[]{"01234","AAAAA","bbbbb","AA()bb"};
291         int[] exp = new int[]{97,226,228,230};
292 
293         for (int j=0;j<toCheck.length;j++) {
294             for (int i=0;i<7;i++) {
295                 int get = oObj.getStringType(toCheck[j], 0,
296                     toCheck[j].length(), getLocale(i));
297                 res &= (get == exp[j]);
298                 if (!res) {
299                     System.out.println("FAILED for: language=" + languages[i] +
300                         " ; country=" + countries[i]);
301                     System.out.println("Expected: " + exp[j]);
302                     System.out.println("Gained : " + get);
303                 }
304             }
305         }
306         Assert.assertTrue("getStringType()", res);
307     }
308 
309     /**
310     * After string to be checked is initialized (all symbols are sorted
311     * by direction, so the character of <code>i<sup><small>th</small></sup></code>
312     * direction is located on <code>i<sup><small>th</small></sup></code>
313     * position), test calls the method for every character of that string. <p>
314     * Has <b> OK </b> status if the method returns direction, that's equal to
315     * a symbol position in the string.
316     */
317     @Test
318     public void _getCharacterDirection() {
319         boolean res = true;
320         String toCheck = new String(new char[]{65,1470,48,47,35,1632,44,10,
321                                 9,12,33,8234,8237,1563,8235,8238,8236,768,1});
322         for (short i=0;i<19;i++) {
323             short get = oObj.getCharacterDirection(toCheck, i);
324             res &= (get == i);
325             if (!res) {
326                 System.out.println("Code :" + toCheck.toCharArray()[i]);
327                 System.out.println("Gained: " + get);
328                 System.out.println("Expected: " + i);
329             }
330         }
331         Assert.assertTrue("getCharacterDirection()", res);
332     }
333 
334     /**
335     * At first we define <code>int[]</code> and <code>char[]</code> arrays of
336     * unicode symbol numbers, arranged as sequences, where symbols are sorted
337     * by type, so the character of <code>i<sup><small>th</small></sup></code>
338     * type is located on <code>i<sup><small>th</small></sup></code> position.<p>
339     * Has <b> OK </b> status if for each character method returns value, that
340     * is equal to a number where element is located in array. Also method has
341     * <b> OK </b> status for symbol with code 55296, because it doesn't work
342     * since it hasn't the right neighborhood.<p>
343     * @see "http://ppewww.ph.gla.ac.uk/~flavell/unicode/unidata.html"
344     */
345     @Test
346     public void _getScript() {
347         boolean res = true;
348         char[] characters = new char[]{65,128,256,384,592,750,773,924,1030,1331,1448,
349             1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590,
350             3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943,
351             8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719,
352             9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727,
353             12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744,
354             64257,64370,65056,65073,65131,65146,65532,65288};
355         int[] charsInt = new int[]{65,128,256,384,592,750,773,924,1030,1331,1448,
356             1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590,
357             3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943,
358             8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719,
359             9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727,
360             12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744,
361             64257,64370,65056,65073,65131,65146,65532,65288};
362         String toCheck = new String(characters);
363 
364         for (int i=0;i<characters.length;i++) {
365             int get = oObj.getScript(toCheck, i);
366             res &= (get == i);
367             //The HIGH_SURROGATE 55296 doesn't work since it hasn't the right
368             //neighborhood
369             if (toCheck.substring(i, i + 1).hashCode() == 55296) res = true;
370             if (!res) {
371                 System.out.println("-- " + toCheck.substring(i, i + 1).hashCode());
372                 System.out.println("Code: " + Integer.toHexString(charsInt[i]));
373                 System.out.println("Gained: " + unicode_script[get]);
374                 System.out.println("Expected: " + unicode_script[i]);
375             }
376         }
377         Assert.assertTrue("getScript()", res);
378     }
379 
380     /**
381     * After defining a string to be parsed and parse conditions (flags), test
382     * calls the method for different locales three times with different parameters,
383     * checking result after every call.  <p>
384     * Has <b> OK </b> status if the method returns right results all three
385     * times.
386     */
387     @Test
388     public void _parseAnyToken() {
389         int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE;
390         int nContFlags = KParseTokens.ANY_ALNUM | KParseTokens.ASC_UNDERSCORE
391                         | KParseTokens.ASC_DOT;
392         String toCheck = " 18 i18n ^";
393         ParseResult pRes = null;
394         boolean res = true;
395 
396         for (int i=0;i<7;i++) {
397             pRes = oObj.parseAnyToken(toCheck, 1, getLocale(i),
398                 nStartFlags, "", nContFlags, "");
399             res = ( (pRes.CharLen==2)
400                  && (pRes.TokenType==32)
401                  && (pRes.Value==18.0) );
402             pRes = oObj.parseAnyToken(toCheck, 4, getLocale(i),
403                 nStartFlags, "", nContFlags, "");
404             res &= ( (pRes.CharLen==4)
405                   && (pRes.TokenType==4)
406                   && (pRes.Value==0.0) );
407             pRes = oObj.parseAnyToken(toCheck, 9, getLocale(i),
408                 nStartFlags, "", nContFlags, "");
409             res &= ( (pRes.CharLen==1)
410                   && (pRes.TokenType==1)
411                   && (pRes.Value==0.0) );
412         }
413         Assert.assertTrue("parseAnyToken()", res);
414     }
415 
416     /**
417     * After defining a string to be parsed and parse conditions (flags), test
418     * calls the method for different locales two times with different parameters,
419     * checking result after every call. <p>
420     * Has <b> OK </b> status if the method returns right results.
421     */
422     @Test
423     public void _parsePredefinedToken() {
424         int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE;
425         int nContFlags = nStartFlags;
426         String toCheck = " 18 int";
427         ParseResult pRes = null;
428         boolean res = true;
429 
430         for (int i=0;i<7;i++) {
431             pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck,
432                 1, getLocale(i), nStartFlags, "", nContFlags, "");
433             res = (pRes.CharLen==0);
434             pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck,
435                 4, getLocale(i), nStartFlags, "", nContFlags, "");
436             res &= ( (pRes.CharLen==3)
437                   && (pRes.TokenType==4)
438                   && (pRes.Value==0.0) );
439         }
440         Assert.assertTrue("parsePredefinedToken()", res);
441     }
442 
443 
444     /**
445     * Method returns locale for a given language and country.
446     * @param k index of needed locale.
447     */
448     private Locale getLocale(int k) {
449         return new Locale(languages[k],countries[k],"");
450     }
451 
452 
453 } // end XCharacterClassification
454