1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package ifc.i18n;
25 
26 import lib.MultiMethodTest;
27 
28 import com.sun.star.i18n.KParseTokens;
29 import com.sun.star.i18n.KParseType;
30 import com.sun.star.i18n.ParseResult;
31 import com.sun.star.i18n.XCharacterClassification;
32 import com.sun.star.lang.Locale;
33 
34 /**
35  * Testing <code>com.sun.star.i18n.XCharacterClassification</code>
36  * interface methods:
37  * <ul>
38  *  <li><code> toUpper() </code></li>
39  *  <li><code> toLower() </code></li>
40  *  <li><code> toTitle() </code></li>
41  *  <li><code> getType() </code></li>
42  *  <li><code> getCharacterType() </code></li>
43  *  <li><code> getStringType() </code></li>
44  *  <li><code> getCharacterDirection() </code></li>
45  *  <li><code> getScript() </code></li>
46  *  <li><code> parseAnyToken() </code></li>
47  *  <li><code> parsePredefinedToken() </code></li>
48  * </ul><p>
49  * Test is <b> NOT </b> multithread compilant. <p>
50  * @see com.sun.star.i18n.XCharacterClassification
51  */
52 public class _XCharacterClassification extends MultiMethodTest {
53     public XCharacterClassification oObj = null;
54     public String[] languages = new String[]{"de","en","es","fr","ja","ko","zh"};
55     public String[] countries = new String[]{"DE","US","ES","FR","JP","KR","CN"};
56 
57     public String[] charstyles_java = new String[] {"UNASSIGNED","UPPERCASE_LETTER",
58         "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER",
59         "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK",
60         "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR",
61         "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","none17",
62         "PRIVATE_USE","none19","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION",
63         "CONNECTOR_PUNCTUATION","OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL",
64         "MODIFIER_SYMBOL","OTHER_SYMBOL"};
65 
66     public String[] charstyles_office = new String[] {"UNASSIGNED","UPPERCASE_LETTER",
67         "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER",
68         "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK",
69         "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR",
70         "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","PRIVATE_USE",
71         "OTHER_PUNCTUATION","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION",
72         "CONNECTOR_PUNCTUATION",
73         "OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL","MODIFIER_SYMBOL",
74         "OTHER_SYMBOL","INITIAL_PUNCTUATION","FINAL_PUNCTUATION","GENERAL_TYPES_COUNT"};
75 
76     public String[] unicode_script = new String[] {"U_BASIC_LATIN","U_LATIN_1_SUPPLEMENT",
77         "U_LATIN_EXTENDED_A","U_LATIN_EXTENDED_B","U_IPA_EXTENSIONS","U_SPACING_MODIFIER_LETTERS",
78         "U_COMBINING_DIACRITICAL_MARKS","U_GREEK","U_CYRILLIC","U_ARMENIAN","U_HEBREW",
79         "U_ARABIC","U_SYRIAC","U_THAANA","U_DEVANAGARI","U_BENGALI","U_GURMUKHI",
80         "U_GUJARATI","U_ORIYA","U_TAMIL","U_TELUGU","U_KANNADA","U_MALAYALAM",
81         "U_SINHALA","U_THAI","U_LAO","U_TIBETAN","U_MYANMAR","U_GEORGIAN",
82         "U_HANGUL_JAMO","U_ETHIOPIC","U_CHEROKEE","U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
83         "U_OGHAM","U_RUNIC","U_KHMER","U_MONGOLIAN","U_LATIN_EXTENDED_ADDITIONAL",
84         "U_GREEK_EXTENDED","U_GENERAL_PUNCTUATION","U_SUPERSCRIPTS_AND_SUBSCRIPTS",
85         "U_CURRENCY_SYMBOLS","U_COMBINING_MARKS_FOR_SYMBOLS","U_LETTERLIKE_SYMBOLS",
86         "U_NUMBER_FORMS","U_ARROWS","U_MATHEMATICAL_OPERATORS","U_MISCELLANEOUS_TECHNICAL",
87         "U_CONTROL_PICTURES","U_OPTICAL_CHARACTER_RECOGNITION","U_ENCLOSED_ALPHANUMERICS",
88         "U_BOX_DRAWING","U_BLOCK_ELEMENTS","U_GEOMETRIC_SHAPES","U_MISCELLANEOUS_SYMBOLS",
89         "U_DINGBATS","U_BRAILLE_PATTERNS","U_CJK_RADICALS_SUPPLEMENT","U_KANGXI_RADICALS",
90         "U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS","U_CJK_SYMBOLS_AND_PUNCTUATION",
91         "U_HIRAGANA","U_KATAKANA","U_BOPOMOFO","U_HANGUL_COMPATIBILITY_JAMO","U_KANBUN",
92         "U_BOPOMOFO_EXTENDED","U_ENCLOSED_CJK_LETTERS_AND_MONTHS","U_CJK_COMPATIBILITY",
93         "U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A","U_CJK_UNIFIED_IDEOGRAPHS","U_YI_SYLLABLES",
94         "U_YI_RADICALS","U_HANGUL_SYLLABLES","U_HIGH_SURROGATES","U_HIGH_PRIVATE_USE_SURROGATES",
95         "U_LOW_SURROGATES","U_PRIVATE_USE_AREA","U_CJK_COMPATIBILITY_IDEOGRAPHS",
96         "U_ALPHABETIC_PRESENTATION_FORMS","U_ARABIC_PRESENTATION_FORMS_A","U_COMBINING_HALF_MARKS",
97         "U_CJK_COMPATIBILITY_FORMS","U_SMALL_FORM_VARIANTS","U_ARABIC_PRESENTATION_FORMS_B",
98         "U_SPECIALS","U_HALFWIDTH_AND_FULLWIDTH_FORMS","U_CHAR_SCRIPT_COUNT","U_NO_SCRIPT"};
99 
100     /**
101     * Test calls the method for different locales. Then each result is compared
102     * with a string, converted to a upper case using
103     * <code>java.lang.String</code> method <code>toUpperCase()</code>.<p>
104     * Has <b> OK </b> status if string, returned by the method is equal to
105     * a string that is returned by String.toUpperCase() for all locales.
106     */
107     public void _toUpper() {
108         boolean res = true;
109         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404};
110         String toCheck = new String(characters);
111         String get = "";
112         String exp = "";
113 
114         for (int i=0;i<7;i++) {
115             get = oObj.toUpper(toCheck, 0, toCheck.length(), getLocale(i));
116             exp = toCheck.toUpperCase(
117                 new java.util.Locale(languages[i], countries[i]));
118             res &= get.equals(exp);
119             if (!res) {
120                 log.println("FAILED for: language=" + languages[i] +
121                     " ; country=" + countries[i]);
122                 log.println("Expected: " + exp);
123                 log.println("Gained : " + get);
124             }
125         }
126         tRes.tested("toUpper()", res);
127     }
128 
129     /**
130     * Test calls the method for different locales. Then each result is compared
131     * with a string, converted to a lower case using
132     * <code>java.lang.String</code> method <code>toLowerCase()</code>.<p>
133     * Has <b> OK </b> status if string, returned by the method is equal to
134     * a string that is returned by String.toLowerCase() for all locales.
135     */
136     public void _toLower() {
137         boolean res = true;
138         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404};
139         String toCheck = new String(characters);
140         String get = "";
141         String exp = "";
142 
143         for (int i=0;i<7;i++) {
144             get = oObj.toLower(toCheck,0,toCheck.length(),getLocale(i));
145             exp = toCheck.toLowerCase(
146                 new java.util.Locale(languages[i],countries[i]));
147             res &= get.equals(exp);
148             if (!res) {
149                 log.println("FAILED for: language=" + languages[i]
150                     + " ; country=" + countries[i]);
151                 log.println("Expected: " + exp);
152                 log.println("Gained : " + get);
153             }
154         }
155         tRes.tested("toLower()", res);
156     }
157 
158     /**
159     * Test calls the method for different locales. Then each result is compared
160     * with a string, converted to a title case using
161     * <code>java.lang.Character</code> method <code>toTitleCase()</code>.<p>
162     * Has <b> OK </b> status if string, returned by the method is equal to
163     * a string that was converted using Character.toTitleCase() for all locales.
164     */
165     public void _toTitle() {
166         boolean res = true;
167         String toCheck = new String(new char[]{8112});
168         String get = "";
169         String exp = "";
170 
171         for (int i=0;i<7;i++) {
172             get = oObj.toTitle(toCheck, 0, 1, getLocale(i));
173             exp = new String(
174                 new char[]{Character.toTitleCase(toCheck.toCharArray()[0])});
175             res &= get.equals(exp);
176             if (!res) {
177                 log.println("FAILED for: language=" + languages[i]
178                     + " ; country=" + countries[i]);
179                 log.println("Expected: " + exp);
180                 log.println("Gained : " + get);
181             }
182         }
183         tRes.tested("toTitle()", res);
184     }
185 
186     /**
187     * At first we define <code>int[]</code> and <code>char[]</code> arrays of
188     * unicode symbol numbers, arranged as sequences, where symbols are sorted
189     * by type, so the character of <code>i<sup><small>th</small></sup></code>
190     * type is located on <code>i<sup><small>th</small></sup></code> position.<p>
191     * Has <b> OK </b> status if for all 30 types the method returns value, that
192     * is equal to an element number.<p>
193     * @see com.sun.star.i18n.CharType
194     */
195     public void _getType() {
196         boolean res = true;
197         char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404,
198             48,8544,179,32,8232,8233,144,8204,57344,56320,173,40,41,95,3852,247,
199             3647,901,3896,171,187};
200         int[] charsInt = new int[]{586,65,97,498,721,4588,772,8413,3404,48,
201             8544,179,32,8232,8233,144,8204,57344,56320,173,40,41,95,3852,247,
202             3647,901,3896,171,187};
203         String toCheck = new String(characters);
204 
205         for (int i=0;i<characters.length;i++) {
206             int get = oObj.getType(toCheck, i);
207             res &= (charstyles_office[get] == charstyles_office[i]);
208             if (!res) {
209                 log.println("Code :" + Integer.toHexString(charsInt[i]));
210                 log.println("Gained: " + charstyles_office[get]);
211                 log.println("Expected : " + charstyles_office[i]);
212             }
213         }
214         tRes.tested("getType()", res);
215     }
216 
217     /**
218     * After defining string to be checked and array of expected types, test
219     * calls the method for each character of a string and for all locales.<p>
220     * Has <b> OK </b> status if the method returns type, expected for a given
221     * character and locale.
222     */
223     public void _getCharacterType() {
224         boolean res = true;
225         String toCheck = "Ab0)";
226         int[] expected = new int[]{226,228,97,32};
227 
228         for (int i=0;i<toCheck.length();i++) {
229             for (int j=1;j<7;j++) {
230                 int get = oObj.getCharacterType(toCheck, i, getLocale(j));
231                 res &= (get == expected[i]);
232                 if (!res) {
233                     log.println("FAILED for: language=" + languages[j] +
234                         " ; country=" + countries[j]);
235                     log.println("Sysmbol :" + toCheck.toCharArray()[i]);
236                     log.println("Gained: " + get);
237                     log.println("Expected : " + expected[i]);
238                 }
239             }
240         }
241         tRes.tested("getCharacterType()", res);
242     }
243 
244     /**
245     * After defining array of strings to be checked and array of expected types,
246     * test calls the method for each string of an array and for all locales.<p>
247     * Has <b> OK </b> status if the method returns type, expected for a given
248     * string and locale.
249     */
250     public void _getStringType() {
251         boolean res = true;
252         String[] toCheck = new String[]{"01234","AAAAA","bbbbb","AA()bb"};
253         int[] exp = new int[]{97,226,228,230};
254 
255         for (int j=0;j<toCheck.length;j++) {
256             for (int i=0;i<7;i++) {
257                 int get = oObj.getStringType(toCheck[j], 0,
258                     toCheck[j].length(), getLocale(i));
259                 res &= (get == exp[j]);
260                 if (!res) {
261                     log.println("FAILED for: language=" + languages[i] +
262                         " ; country=" + countries[i]);
263                     log.println("Expected: " + exp[j]);
264                     log.println("Gained : " + get);
265                 }
266             }
267         }
268         tRes.tested("getStringType()", res);
269     }
270 
271     /**
272     * After string to be checked is initialized (all symbols are sorted
273     * by direction, so the character of <code>i<sup><small>th</small></sup></code>
274     * direction is located on <code>i<sup><small>th</small></sup></code>
275     * position), test calls the method for every character of that string. <p>
276     * Has <b> OK </b> status if the method returns direction, that's equal to
277     * a symbol position in the string.
278     */
279     public void _getCharacterDirection() {
280         boolean res = true;
281         String toCheck = new String(new char[]{65,1470,48,47,35,1632,44,10,
282                                 9,12,33,8234,8237,1563,8235,8238,8236,768,1});
283         for (short i=0;i<19;i++) {
284             short get = oObj.getCharacterDirection(toCheck, i);
285             res &= (get == i);
286             if (!res) {
287                 log.println("Code :" + toCheck.toCharArray()[i]);
288                 log.println("Gained: " + get);
289                 log.println("Expected: " + i);
290             }
291         }
292         tRes.tested("getCharacterDirection()", res);
293     }
294 
295     /**
296     * At first we define <code>int[]</code> and <code>char[]</code> arrays of
297     * unicode symbol numbers, arranged as sequences, where symbols are sorted
298     * by type, so the character of <code>i<sup><small>th</small></sup></code>
299     * type is located on <code>i<sup><small>th</small></sup></code> position.<p>
300     * Has <b> OK </b> status if for each character method returns value, that
301     * is equal to a number where element is located in array. Also method has
302     * <b> OK </b> status for symbol with code 55296, because it doesn't work
303     * since it hasn't the right neighborhood.<p>
304     * @see http://ppewww.ph.gla.ac.uk/~flavell/unicode/unidata.html
305     */
306     public void _getScript() {
307         boolean res = true;
308         char[] characters = new char[]{65,128,256,384,592,750,773,924,1030,1331,1448,
309             1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590,
310             3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943,
311             8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719,
312             9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727,
313             12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744,
314             64257,64370,65056,65073,65131,65146,65532,65288};
315         int[] charsInt = new int[]{65,128,256,384,592,750,773,924,1030,1331,1448,
316             1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590,
317             3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943,
318             8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719,
319             9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727,
320             12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744,
321             64257,64370,65056,65073,65131,65146,65532,65288};
322         String toCheck = new String(characters);
323 
324         for (int i=0;i<characters.length;i++) {
325             int get = oObj.getScript(toCheck, i);
326             res &= (get == i);
327             //The HIGH_SURROGATE 55296 doesn't work since it hasn't the right
328             //neighborhood
329             if (toCheck.substring(i, i + 1).hashCode() == 55296) res = true;
330             if (!res) {
331                 log.println("-- " + toCheck.substring(i, i + 1).hashCode());
332                 log.println("Code: " + Integer.toHexString(charsInt[i]));
333                 log.println("Gained: " + unicode_script[get]);
334                 log.println("Expected: " + unicode_script[i]);
335             }
336         }
337         tRes.tested("getScript()", res);
338     }
339 
340     /**
341     * After defining a string to be parsed and parse conditions (flags), test
342     * calls the method for different locales three times with different parameters,
343     * checking result after every call.  <p>
344     * Has <b> OK </b> status if the method returns right results all three
345     * times.
346     */
347     public void _parseAnyToken() {
348         int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE;
349         int nContFlags = KParseTokens.ANY_ALNUM | KParseTokens.ASC_UNDERSCORE
350                         | KParseTokens.ASC_DOT;
351         String toCheck = " 18 i18n ^";
352         ParseResult pRes = null;
353         boolean res = true;
354 
355         for (int i=0;i<7;i++) {
356             pRes = oObj.parseAnyToken(toCheck, 1, getLocale(i),
357                 nStartFlags, "", nContFlags, "");
358             res = ( (pRes.CharLen==2)
359                  && (pRes.TokenType==32)
360                  && (pRes.Value==18.0) );
361             pRes = oObj.parseAnyToken(toCheck, 4, getLocale(i),
362                 nStartFlags, "", nContFlags, "");
363             res &= ( (pRes.CharLen==4)
364                   && (pRes.TokenType==4)
365                   && (pRes.Value==0.0) );
366             pRes = oObj.parseAnyToken(toCheck, 9, getLocale(i),
367                 nStartFlags, "", nContFlags, "");
368             res &= ( (pRes.CharLen==1)
369                   && (pRes.TokenType==1)
370                   && (pRes.Value==0.0) );
371         }
372         tRes.tested("parseAnyToken()", res);
373     }
374 
375     /**
376     * After defining a string to be parsed and parse conditions (flags), test
377     * calls the method for different locales two times with different parameters,
378     * checking result after every call. <p>
379     * Has <b> OK </b> status if the method returns right results.
380     */
381     public void _parsePredefinedToken() {
382         int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE;
383         int nContFlags = nStartFlags;
384         String toCheck = " 18 int";
385         ParseResult pRes = null;
386         boolean res = true;
387 
388         for (int i=0;i<7;i++) {
389             pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck,
390                 1, getLocale(i), nStartFlags, "", nContFlags, "");
391             res = (pRes.CharLen==0);
392             pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck,
393                 4, getLocale(i), nStartFlags, "", nContFlags, "");
394             res &= ( (pRes.CharLen==3)
395                   && (pRes.TokenType==4)
396                   && (pRes.Value==0.0) );
397         }
398         tRes.tested("parsePredefinedToken()", res);
399     }
400 
401 
402     /**
403     * Method returns locale for a given language and country.
404     * @param localeIndex index of needed locale.
405     */
406     private Locale getLocale(int k) {
407         return new Locale(languages[k],countries[k],"");
408     }
409 
410 
411 } // end XCharacterClassification
412 
413