1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 package ifc.i18n;
29 
30 import lib.MultiMethodTest;
31 
32 import com.sun.star.i18n.KParseTokens;
33 import com.sun.star.i18n.KParseType;
34 import com.sun.star.i18n.ParseResult;
35 import com.sun.star.i18n.XCharacterClassification;
36 import com.sun.star.lang.Locale;
37 
38 /**
39  * Testing <code>com.sun.star.i18n.XCharacterClassification</code>
40  * interface methods:
41  * <ul>
42  *  <li><code> toUpper() </code></li>
43  *  <li><code> toLower() </code></li>
44  *  <li><code> toTitle() </code></li>
45  *  <li><code> getType() </code></li>
46  *  <li><code> getCharacterType() </code></li>
47  *  <li><code> getStringType() </code></li>
48  *  <li><code> getCharacterDirection() </code></li>
49  *  <li><code> getScript() </code></li>
50  *  <li><code> parseAnyToken() </code></li>
51  *  <li><code> parsePredefinedToken() </code></li>
52  * </ul><p>
53  * Test is <b> NOT </b> multithread compilant. <p>
54  * @see com.sun.star.i18n.XCharacterClassification
55  */
56 public class _XCharacterClassification extends MultiMethodTest {
57     public XCharacterClassification oObj = null;
58     public String[] languages = new String[]{"de","en","es","fr","ja","ko","zh"};
59     public String[] countries = new String[]{"DE","US","ES","FR","JP","KR","CN"};
60 
61     public String[] charstyles_java = new String[] {"UNASSIGNED","UPPERCASE_LETTER",
62         "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER",
63         "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK",
64         "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR",
65         "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","none17",
66         "PRIVATE_USE","none19","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION",
67         "CONNECTOR_PUNCTUATION","OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL",
68         "MODIFIER_SYMBOL","OTHER_SYMBOL"};
69 
70     public String[] charstyles_office = new String[] {"UNASSIGNED","UPPERCASE_LETTER",
71         "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER",
72         "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK",
73         "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR",
74         "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","PRIVATE_USE",
75         "OTHER_PUNCTUATION","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION",
76         "CONNECTOR_PUNCTUATION",
77         "OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL","MODIFIER_SYMBOL",
78         "OTHER_SYMBOL","INITIAL_PUNCTUATION","FINAL_PUNCTUATION","GENERAL_TYPES_COUNT"};
79 
80     public String[] unicode_script = new String[] {"U_BASIC_LATIN","U_LATIN_1_SUPPLEMENT",
81         "U_LATIN_EXTENDED_A","U_LATIN_EXTENDED_B","U_IPA_EXTENSIONS","U_SPACING_MODIFIER_LETTERS",
82         "U_COMBINING_DIACRITICAL_MARKS","U_GREEK","U_CYRILLIC","U_ARMENIAN","U_HEBREW",
83         "U_ARABIC","U_SYRIAC","U_THAANA","U_DEVANAGARI","U_BENGALI","U_GURMUKHI",
84         "U_GUJARATI","U_ORIYA","U_TAMIL","U_TELUGU","U_KANNADA","U_MALAYALAM",
85         "U_SINHALA","U_THAI","U_LAO","U_TIBETAN","U_MYANMAR","U_GEORGIAN",
86         "U_HANGUL_JAMO","U_ETHIOPIC","U_CHEROKEE","U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
87         "U_OGHAM","U_RUNIC","U_KHMER","U_MONGOLIAN","U_LATIN_EXTENDED_ADDITIONAL",
88         "U_GREEK_EXTENDED","U_GENERAL_PUNCTUATION","U_SUPERSCRIPTS_AND_SUBSCRIPTS",
89         "U_CURRENCY_SYMBOLS","U_COMBINING_MARKS_FOR_SYMBOLS","U_LETTERLIKE_SYMBOLS",
90         "U_NUMBER_FORMS","U_ARROWS","U_MATHEMATICAL_OPERATORS","U_MISCELLANEOUS_TECHNICAL",
91         "U_CONTROL_PICTURES","U_OPTICAL_CHARACTER_RECOGNITION","U_ENCLOSED_ALPHANUMERICS",
92         "U_BOX_DRAWING","U_BLOCK_ELEMENTS","U_GEOMETRIC_SHAPES","U_MISCELLANEOUS_SYMBOLS",
93         "U_DINGBATS","U_BRAILLE_PATTERNS","U_CJK_RADICALS_SUPPLEMENT","U_KANGXI_RADICALS",
94         "U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS","U_CJK_SYMBOLS_AND_PUNCTUATION",
95         "U_HIRAGANA","U_KATAKANA","U_BOPOMOFO","U_HANGUL_COMPATIBILITY_JAMO","U_KANBUN",
96         "U_BOPOMOFO_EXTENDED","U_ENCLOSED_CJK_LETTERS_AND_MONTHS","U_CJK_COMPATIBILITY",
97         "U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A","U_CJK_UNIFIED_IDEOGRAPHS","U_YI_SYLLABLES",
98         "U_YI_RADICALS","U_HANGUL_SYLLABLES","U_HIGH_SURROGATES","U_HIGH_PRIVATE_USE_SURROGATES",
99         "U_LOW_SURROGATES","U_PRIVATE_USE_AREA","U_CJK_COMPATIBILITY_IDEOGRAPHS",
100         "U_ALPHABETIC_PRESENTATION_FORMS","U_ARABIC_PRESENTATION_FORMS_A","U_COMBINING_HALF_MARKS",
101         "U_CJK_COMPATIBILITY_FORMS","U_SMALL_FORM_VARIANTS","U_ARABIC_PRESENTATION_FORMS_B",
102         "U_SPECIALS","U_HALFWIDTH_AND_FULLWIDTH_FORMS","U_CHAR_SCRIPT_COUNT","U_NO_SCRIPT"};
103 
104     /**
105     * Test calls the method for different locales. Then each result is compared
106     * with a string, converted to a upper case using
107     * <code>java.lang.String</code> method <code>toUpperCase()</code>.<p>
108     * Has <b> OK </b> status if string, returned by the method is equal to
109     * a string that is returned by String.toUpperCase() for all locales.
110     */
111     public void _toUpper() {
112         boolean res = true;
113         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404};
114         String toCheck = new String(characters);
115         String get = "";
116         String exp = "";
117 
118         for (int i=0;i<7;i++) {
119             get = oObj.toUpper(toCheck, 0, toCheck.length(), getLocale(i));
120             exp = toCheck.toUpperCase(
121                 new java.util.Locale(languages[i], countries[i]));
122             res &= get.equals(exp);
123             if (!res) {
124                 log.println("FAILED for: language=" + languages[i] +
125                     " ; country=" + countries[i]);
126                 log.println("Expected: " + exp);
127                 log.println("Gained : " + get);
128             }
129         }
130         tRes.tested("toUpper()", res);
131     }
132 
133     /**
134     * Test calls the method for different locales. Then each result is compared
135     * with a string, converted to a lower case using
136     * <code>java.lang.String</code> method <code>toLowerCase()</code>.<p>
137     * Has <b> OK </b> status if string, returned by the method is equal to
138     * a string that is returned by String.toLowerCase() for all locales.
139     */
140     public void _toLower() {
141         boolean res = true;
142         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404};
143         String toCheck = new String(characters);
144         String get = "";
145         String exp = "";
146 
147         for (int i=0;i<7;i++) {
148             get = oObj.toLower(toCheck,0,toCheck.length(),getLocale(i));
149             exp = toCheck.toLowerCase(
150                 new java.util.Locale(languages[i],countries[i]));
151             res &= get.equals(exp);
152             if (!res) {
153                 log.println("FAILED for: language=" + languages[i]
154                     + " ; country=" + countries[i]);
155                 log.println("Expected: " + exp);
156                 log.println("Gained : " + get);
157             }
158         }
159         tRes.tested("toLower()", res);
160     }
161 
162     /**
163     * Test calls the method for different locales. Then each result is compared
164     * with a string, converted to a title case using
165     * <code>java.lang.Character</code> method <code>toTitleCase()</code>.<p>
166     * Has <b> OK </b> status if string, returned by the method is equal to
167     * a string that was converted using Character.toTitleCase() for all locales.
168     */
169     public void _toTitle() {
170         boolean res = true;
171         String toCheck = new String(new char[]{8112});
172         String get = "";
173         String exp = "";
174 
175         for (int i=0;i<7;i++) {
176             get = oObj.toTitle(toCheck, 0, 1, getLocale(i));
177             exp = new String(
178                 new char[]{Character.toTitleCase(toCheck.toCharArray()[0])});
179             res &= get.equals(exp);
180             if (!res) {
181                 log.println("FAILED for: language=" + languages[i]
182                     + " ; country=" + countries[i]);
183                 log.println("Expected: " + exp);
184                 log.println("Gained : " + get);
185             }
186         }
187         tRes.tested("toTitle()", res);
188     }
189 
190     /**
191     * At first we define <code>int[]</code> and <code>char[]</code> arrays of
192     * unicode symbol numbers, arranged as sequences, where symbols are sorted
193     * by type, so the character of <code>i<sup><small>th</small></sup></code>
194     * type is located on <code>i<sup><small>th</small></sup></code> position.<p>
195     * Has <b> OK </b> status if for all 30 types the method returns value, that
196     * is equal to an element number.<p>
197     * @see com.sun.star.i18n.CharType
198     */
199     public void _getType() {
200         boolean res = true;
201         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404,
202             48,8544,179,32,8232,8233,144,8204,57344,56320,173,40,41,95,3852,247,
203             3647,901,3896,171,187};
204         int[] charsInt = new int[]{586,65,97,498,721,4588,772,8413,3404,48,
205             8544,179,32,8232,8233,144,8204,57344,56320,173,40,41,95,3852,247,
206             3647,901,3896,171,187};
207         String toCheck = new String(characters);
208 
209         for (int i=0;i<characters.length;i++) {
210             int get = oObj.getType(toCheck, i);
211             res &= (charstyles_office[get] == charstyles_office[i]);
212             if (!res) {
213                 log.println("Code :" + Integer.toHexString(charsInt[i]));
214                 log.println("Gained: " + charstyles_office[get]);
215                 log.println("Expected : " + charstyles_office[i]);
216             }
217         }
218         tRes.tested("getType()", res);
219     }
220 
221     /**
222     * After defining string to be checked and array of expected types, test
223     * calls the method for each character of a string and for all locales.<p>
224     * Has <b> OK </b> status if the method returns type, expected for a given
225     * character and locale.
226     */
227     public void _getCharacterType() {
228         boolean res = true;
229         String toCheck = "Ab0)";
230         int[] expected = new int[]{226,228,97,32};
231 
232         for (int i=0;i<toCheck.length();i++) {
233             for (int j=1;j<7;j++) {
234                 int get = oObj.getCharacterType(toCheck, i, getLocale(j));
235                 res &= (get == expected[i]);
236                 if (!res) {
237                     log.println("FAILED for: language=" + languages[j] +
238                         " ; country=" + countries[j]);
239                     log.println("Sysmbol :" + toCheck.toCharArray()[i]);
240                     log.println("Gained: " + get);
241                     log.println("Expected : " + expected[i]);
242                 }
243             }
244         }
245         tRes.tested("getCharacterType()", res);
246     }
247 
248     /**
249     * After defining array of strings to be checked and array of expected types,
250     * test calls the method for each string of an array and for all locales.<p>
251     * Has <b> OK </b> status if the method returns type, expected for a given
252     * string and locale.
253     */
254     public void _getStringType() {
255         boolean res = true;
256         String[] toCheck = new String[]{"01234","AAAAA","bbbbb","AA()bb"};
257         int[] exp = new int[]{97,226,228,230};
258 
259         for (int j=0;j<toCheck.length;j++) {
260             for (int i=0;i<7;i++) {
261                 int get = oObj.getStringType(toCheck[j], 0,
262                     toCheck[j].length(), getLocale(i));
263                 res &= (get == exp[j]);
264                 if (!res) {
265                     log.println("FAILED for: language=" + languages[i] +
266                         " ; country=" + countries[i]);
267                     log.println("Expected: " + exp[j]);
268                     log.println("Gained : " + get);
269                 }
270             }
271         }
272         tRes.tested("getStringType()", res);
273     }
274 
275     /**
276     * After string to be checked is initialized (all symbols are sorted
277     * by direction, so the character of <code>i<sup><small>th</small></sup></code>
278     * direction is located on <code>i<sup><small>th</small></sup></code>
279     * position), test calls the method for every character of that string. <p>
280     * Has <b> OK </b> status if the method returns direction, that's equal to
281     * a symbol position in the string.
282     */
283     public void _getCharacterDirection() {
284         boolean res = true;
285         String toCheck = new String(new char[]{65,1470,48,47,35,1632,44,10,
286                                 9,12,33,8234,8237,1563,8235,8238,8236,768,1});
287         for (short i=0;i<19;i++) {
288             short get = oObj.getCharacterDirection(toCheck, i);
289             res &= (get == i);
290             if (!res) {
291                 log.println("Code :" + toCheck.toCharArray()[i]);
292                 log.println("Gained: " + get);
293                 log.println("Expected: " + i);
294             }
295         }
296         tRes.tested("getCharacterDirection()", res);
297     }
298 
299     /**
300     * At first we define <code>int[]</code> and <code>char[]</code> arrays of
301     * unicode symbol numbers, arranged as sequences, where symbols are sorted
302     * by type, so the character of <code>i<sup><small>th</small></sup></code>
303     * type is located on <code>i<sup><small>th</small></sup></code> position.<p>
304     * Has <b> OK </b> status if for each character method returns value, that
305     * is equal to a number where element is located in array. Also method has
306     * <b> OK </b> status for symbol with code 55296, because it doesn't work
307     * since it hasn't the right neighborhood.<p>
308     * @see http://ppewww.ph.gla.ac.uk/~flavell/unicode/unidata.html
309     */
310     public void _getScript() {
311         boolean res = true;
312         char[] characters = new char[]{65,128,256,384,592,750,773,924,1030,1331,1448,
313             1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590,
314             3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943,
315             8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719,
316             9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727,
317             12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744,
318             64257,64370,65056,65073,65131,65146,65532,65288};
319         int[] charsInt = new int[]{65,128,256,384,592,750,773,924,1030,1331,1448,
320             1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590,
321             3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943,
322             8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719,
323             9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727,
324             12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744,
325             64257,64370,65056,65073,65131,65146,65532,65288};
326         String toCheck = new String(characters);
327 
328         for (int i=0;i<characters.length;i++) {
329             int get = oObj.getScript(toCheck, i);
330             res &= (get == i);
331             //The HIGH_SURROGATE 55296 doesn't work since it hasn't the right
332             //neighborhood
333             if (toCheck.substring(i, i + 1).hashCode() == 55296) res = true;
334             if (!res) {
335                 log.println("-- " + toCheck.substring(i, i + 1).hashCode());
336                 log.println("Code: " + Integer.toHexString(charsInt[i]));
337                 log.println("Gained: " + unicode_script[get]);
338                 log.println("Expected: " + unicode_script[i]);
339             }
340         }
341         tRes.tested("getScript()", res);
342     }
343 
344     /**
345     * After defining a string to be parsed and parse conditions (flags), test
346     * calls the method for different locales three times with different parameters,
347     * checking result after every call.  <p>
348     * Has <b> OK </b> status if the method returns right results all three
349     * times.
350     */
351     public void _parseAnyToken() {
352         int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE;
353         int nContFlags = KParseTokens.ANY_ALNUM | KParseTokens.ASC_UNDERSCORE
354                         | KParseTokens.ASC_DOT;
355         String toCheck = " 18 i18n ^";
356         ParseResult pRes = null;
357         boolean res = true;
358 
359         for (int i=0;i<7;i++) {
360             pRes = oObj.parseAnyToken(toCheck, 1, getLocale(i),
361                 nStartFlags, "", nContFlags, "");
362             res = ( (pRes.CharLen==2)
363                  && (pRes.TokenType==32)
364                  && (pRes.Value==18.0) );
365             pRes = oObj.parseAnyToken(toCheck, 4, getLocale(i),
366                 nStartFlags, "", nContFlags, "");
367             res &= ( (pRes.CharLen==4)
368                   && (pRes.TokenType==4)
369                   && (pRes.Value==0.0) );
370             pRes = oObj.parseAnyToken(toCheck, 9, getLocale(i),
371                 nStartFlags, "", nContFlags, "");
372             res &= ( (pRes.CharLen==1)
373                   && (pRes.TokenType==1)
374                   && (pRes.Value==0.0) );
375         }
376         tRes.tested("parseAnyToken()", res);
377     }
378 
379     /**
380     * After defining a string to be parsed and parse conditions (flags), test
381     * calls the method for different locales two times with different parameters,
382     * checking result after every call. <p>
383     * Has <b> OK </b> status if the method returns right results.
384     */
385     public void _parsePredefinedToken() {
386         int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE;
387         int nContFlags = nStartFlags;
388         String toCheck = " 18 int";
389         ParseResult pRes = null;
390         boolean res = true;
391 
392         for (int i=0;i<7;i++) {
393             pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck,
394                 1, getLocale(i), nStartFlags, "", nContFlags, "");
395             res = (pRes.CharLen==0);
396             pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck,
397                 4, getLocale(i), nStartFlags, "", nContFlags, "");
398             res &= ( (pRes.CharLen==3)
399                   && (pRes.TokenType==4)
400                   && (pRes.Value==0.0) );
401         }
402         tRes.tested("parsePredefinedToken()", res);
403     }
404 
405 
406     /**
407     * Method returns locale for a given language and country.
408     * @param localeIndex index of needed locale.
409     */
410     private Locale getLocale(int k) {
411         return new Locale(languages[k],countries[k],"");
412     }
413 
414 
415 } // end XCharacterClassification
416 
417