1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package api.i18n; 25 26 import com.sun.star.i18n.KParseTokens; 27 import com.sun.star.i18n.KParseType; 28 import com.sun.star.i18n.ParseResult; 29 import com.sun.star.i18n.XCharacterClassification; 30 import com.sun.star.lang.Locale; 31 import com.sun.star.uno.UnoRuntime; 32 import com.sun.star.uno.XComponentContext; 33 import org.junit.After; 34 import org.junit.AfterClass; 35 import org.junit.Before; 36 import org.junit.BeforeClass; 37 import org.junit.Assert; 38 import org.junit.Ignore; 39 import org.junit.Test; 40 import org.openoffice.test.uno.UnoApp; 41 42 /** 43 * Testing <code>com.sun.star.i18n.XCharacterClassification</code> 44 * interface methods: 45 * <ul> 46 * <li><code> toUpper() </code></li> 47 * <li><code> toLower() </code></li> 48 * <li><code> toTitle() </code></li> 49 * <li><code> getType() </code></li> 50 * <li><code> getCharacterType() </code></li> 51 * <li><code> getStringType() </code></li> 52 * <li><code> getCharacterDirection() </code></li> 53 * <li><code> getScript() </code></li> 54 * <li><code> parseAnyToken() </code></li> 55 * <li><code> parsePredefinedToken() </code></li> 56 * </ul><p> 57 * Test is <b> NOT </b> multithread compliant. <p> 58 * @see com.sun.star.i18n.XCharacterClassification 59 */ 60 public class XCharacterClassificationTest { 61 private static final UnoApp app = new UnoApp(); 62 63 private XComponentContext xContext = null; 64 public XCharacterClassification oObj = null; 65 public String[] languages = new String[]{"de","en","es","fr","ja","ko","zh"}; 66 public String[] countries = new String[]{"DE","US","ES","FR","JP","KR","CN"}; 67 68 public String[] charstyles_java = new String[] {"UNASSIGNED","UPPERCASE_LETTER", 69 "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER", 70 "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK", 71 "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR", 72 "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","none17", 73 "PRIVATE_USE","none19","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION", 74 "CONNECTOR_PUNCTUATION","OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL", 75 "MODIFIER_SYMBOL","OTHER_SYMBOL"}; 76 77 public String[] charstyles_office = new String[] {"UNASSIGNED","UPPERCASE_LETTER", 78 "LOWERCASE_LETTER","TITLECASE_LETTER","MODIFIER_LETTER","OTHER_LETTER", 79 "NON_SPACING_MARK","ENCLOSING_MARK","COMBINING_SPACING_MARK", 80 "DECIMAL_DIGIT_NUMBER","LETTER_NUMBER","OTHER_NUMBER","SPACE_SEPARATOR", 81 "LINE_SEPARATOR","PARAGRAPH_SEPARATOR","CONTROL","FORMAT","PRIVATE_USE", 82 "OTHER_PUNCTUATION","DASH_PUNCTUATION","START_PUNCTUATION","END_PUNCTUATION", 83 "CONNECTOR_PUNCTUATION", 84 "OTHER_PUNCTUATION","MATH_SYMBOL","CURRENCY_SYMBOL","MODIFIER_SYMBOL", 85 "OTHER_SYMBOL","INITIAL_PUNCTUATION","FINAL_PUNCTUATION","GENERAL_TYPES_COUNT"}; 86 87 public String[] unicode_script = new String[] {"U_BASIC_LATIN","U_LATIN_1_SUPPLEMENT", 88 "U_LATIN_EXTENDED_A","U_LATIN_EXTENDED_B","U_IPA_EXTENSIONS","U_SPACING_MODIFIER_LETTERS", 89 "U_COMBINING_DIACRITICAL_MARKS","U_GREEK","U_CYRILLIC","U_ARMENIAN","U_HEBREW", 90 "U_ARABIC","U_SYRIAC","U_THAANA","U_DEVANAGARI","U_BENGALI","U_GURMUKHI", 91 "U_GUJARATI","U_ORIYA","U_TAMIL","U_TELUGU","U_KANNADA","U_MALAYALAM", 92 "U_SINHALA","U_THAI","U_LAO","U_TIBETAN","U_MYANMAR","U_GEORGIAN", 93 "U_HANGUL_JAMO","U_ETHIOPIC","U_CHEROKEE","U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 94 "U_OGHAM","U_RUNIC","U_KHMER","U_MONGOLIAN","U_LATIN_EXTENDED_ADDITIONAL", 95 "U_GREEK_EXTENDED","U_GENERAL_PUNCTUATION","U_SUPERSCRIPTS_AND_SUBSCRIPTS", 96 "U_CURRENCY_SYMBOLS","U_COMBINING_MARKS_FOR_SYMBOLS","U_LETTERLIKE_SYMBOLS", 97 "U_NUMBER_FORMS","U_ARROWS","U_MATHEMATICAL_OPERATORS","U_MISCELLANEOUS_TECHNICAL", 98 "U_CONTROL_PICTURES","U_OPTICAL_CHARACTER_RECOGNITION","U_ENCLOSED_ALPHANUMERICS", 99 "U_BOX_DRAWING","U_BLOCK_ELEMENTS","U_GEOMETRIC_SHAPES","U_MISCELLANEOUS_SYMBOLS", 100 "U_DINGBATS","U_BRAILLE_PATTERNS","U_CJK_RADICALS_SUPPLEMENT","U_KANGXI_RADICALS", 101 "U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS","U_CJK_SYMBOLS_AND_PUNCTUATION", 102 "U_HIRAGANA","U_KATAKANA","U_BOPOMOFO","U_HANGUL_COMPATIBILITY_JAMO","U_KANBUN", 103 "U_BOPOMOFO_EXTENDED","U_ENCLOSED_CJK_LETTERS_AND_MONTHS","U_CJK_COMPATIBILITY", 104 "U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A","U_CJK_UNIFIED_IDEOGRAPHS","U_YI_SYLLABLES", 105 "U_YI_RADICALS","U_HANGUL_SYLLABLES","U_HIGH_SURROGATES","U_HIGH_PRIVATE_USE_SURROGATES", 106 "U_LOW_SURROGATES","U_PRIVATE_USE_AREA","U_CJK_COMPATIBILITY_IDEOGRAPHS", 107 "U_ALPHABETIC_PRESENTATION_FORMS","U_ARABIC_PRESENTATION_FORMS_A","U_COMBINING_HALF_MARKS", 108 "U_CJK_COMPATIBILITY_FORMS","U_SMALL_FORM_VARIANTS","U_ARABIC_PRESENTATION_FORMS_B", 109 "U_SPECIALS","U_HALFWIDTH_AND_FULLWIDTH_FORMS","U_CHAR_SCRIPT_COUNT","U_NO_SCRIPT"}; 110 111 // setup and close connections 112 @BeforeClass 113 public static void setUpConnection() throws Exception 114 { 115 app.start(); 116 } 117 118 @AfterClass 119 public static void tearDownConnection() throws InterruptedException, com.sun.star.uno.Exception 120 { 121 app.close(); 122 } 123 124 @Before 125 public void before() throws Exception { 126 xContext = app.getComponentContext(); 127 oObj = UnoRuntime.queryInterface( 128 XCharacterClassification.class, 129 xContext.getServiceManager().createInstanceWithContext("com.sun.star.i18n.CharacterClassification", xContext) 130 ); 131 } 132 133 /** 134 * Test calls the method for different locales. Then each result is compared 135 * with a string, converted to a upper case using 136 * <code>java.lang.String</code> method <code>toUpperCase()</code>.<p> 137 * Has <b> OK </b> status if string, returned by the method is equal to 138 * a string that is returned by String.toUpperCase() for all locales. 139 */ 140 @Test 141 public void _toUpper() { 142 boolean res = true; 143 char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404}; 144 String toCheck = new String(characters); 145 String get = ""; 146 String exp = ""; 147 148 for (int i=0;i<7;i++) { 149 get = oObj.toUpper(toCheck, 0, toCheck.length(), getLocale(i)); 150 exp = toCheck.toUpperCase( 151 new java.util.Locale(languages[i], countries[i])); 152 res &= get.equals(exp); 153 if (!res) { 154 System.out.println("FAILED for: language=" + languages[i] + 155 " ; country=" + countries[i]); 156 System.out.println("Expected: " + exp); 157 System.out.println("Gained : " + get); 158 } 159 } 160 Assert.assertTrue("toUpper()", res); 161 } 162 163 /** 164 * Test calls the method for different locales. Then each result is compared 165 * with a string, converted to a lower case using 166 * <code>java.lang.String</code> method <code>toLowerCase()</code>.<p> 167 * Has <b> OK </b> status if string, returned by the method is equal to 168 * a string that is returned by String.toLowerCase() for all locales. 169 */ 170 @Test 171 @Ignore("Bug #87590 Incorrect casing for U+026A, U+0268, U+0197...") 172 public void _toLower() { 173 boolean res = true; 174 char[] characters = new char[]{586,65,97,498,721,4588,772,8413,3404}; 175 String toCheck = new String(characters); 176 String get = ""; 177 String exp = ""; 178 179 for (int i=0;i<7;i++) { 180 get = oObj.toLower(toCheck,0,toCheck.length(),getLocale(i)); 181 exp = toCheck.toLowerCase( 182 new java.util.Locale(languages[i],countries[i])); 183 res &= get.equals(exp); 184 if (!res) { 185 System.out.println("FAILED for: language=" + languages[i] 186 + " ; country=" + countries[i]); 187 System.out.println("Expected: " + exp); 188 System.out.println("Gained : " + get); 189 } 190 } 191 Assert.assertTrue("toLower()", res); 192 } 193 194 /** 195 * Test calls the method for different locales. Then each result is compared 196 * with a string, converted to a title case using 197 * <code>java.lang.Character</code> method <code>toTitleCase()</code>.<p> 198 * Has <b> OK </b> status if string, returned by the method is equal to 199 * a string that was converted using Character.toTitleCase() for all locales. 200 */ 201 @Test 202 public void _toTitle() { 203 boolean res = true; 204 String toCheck = new String(new char[]{8112}); 205 String get = ""; 206 String exp = ""; 207 208 for (int i=0;i<7;i++) { 209 get = oObj.toTitle(toCheck, 0, 1, getLocale(i)); 210 exp = new String( 211 new char[]{Character.toTitleCase(toCheck.toCharArray()[0])}); 212 res &= get.equals(exp); 213 if (!res) { 214 System.out.println("FAILED for: language=" + languages[i] 215 + " ; country=" + countries[i]); 216 System.out.println("Expected: " + exp); 217 System.out.println("Gained : " + get); 218 } 219 } 220 Assert.assertTrue("toTitle()", res); 221 } 222 223 /** 224 * At first we define <code>int[]</code> and <code>char[]</code> arrays of 225 * unicode symbol numbers, arranged as sequences, where symbols are sorted 226 * by type, so the character of <code>i<sup><small>th</small></sup></code> 227 * type is located on <code>i<sup><small>th</small></sup></code> position.<p> 228 * Has <b> OK </b> status if for all 30 types the method returns value, that 229 * is equal to an element number.<p> 230 * @see com.sun.star.i18n.CharType 231 */ 232 @Test 233 public void _getType() { 234 boolean res = true; 235 // If or when this list gets out of date again, 236 // find new characters of the required type with: 237 // for (int i = 30; i <= 0xffff; i++) { 238 // String s = new String("" + (char)i); 239 // int type = oObj.getType(s, 0); 240 // System.out.println("character " + i + " has type " + charstyles_office[type]); 241 // } 242 char[] characters = new char[]{888,65,97,498,721,4588,772,8413,3404, 243 48,8544,179,32,8232,8233,144,8204,57344,56320,45,40,41,95,3852,247, 244 3647,901,3896,171,187}; 245 String toCheck = new String(characters); 246 247 for (int i=0;i<characters.length;i++) { 248 int get = oObj.getType(toCheck, i); 249 res &= (charstyles_office[get] == charstyles_office[i]); 250 if (!res) { 251 System.out.println("Code: " + Integer.toHexString((int)characters[i])); 252 System.out.println("Gained: " + charstyles_office[get]); 253 System.out.println("Expected: " + charstyles_office[i]); 254 } 255 } 256 Assert.assertTrue("getType()", res); 257 } 258 259 /** 260 * After defining string to be checked and array of expected types, test 261 * calls the method for each character of a string and for all locales.<p> 262 * Has <b> OK </b> status if the method returns type, expected for a given 263 * character and locale. 264 */ 265 @Test 266 public void _getCharacterType() { 267 boolean res = true; 268 // If or when this list gets out of date again, 269 // find new characters of the required type with: 270 //for (int i = 32; i <= 0xffff; i++) { 271 // int charType = oObj.getCharacterType("" + (char)i, 0, getLocale(1)); 272 // if (charType == 32) { 273 // System.out.println("character " + i + " has type 32"); 274 // } 275 //} 276 String toCheck = "Ab0*"; 277 int[] expected = new int[]{226,228,97,32}; 278 279 for (int i=0;i<toCheck.length();i++) { 280 for (int j=1;j<7;j++) { 281 int get = oObj.getCharacterType(toCheck, i, getLocale(j)); 282 res &= (get == expected[i]); 283 if (!res) { 284 System.out.println("FAILED for: language=" + languages[j] + 285 " ; country=" + countries[j]); 286 System.out.println("Symbol: " + toCheck.toCharArray()[i]); 287 System.out.println("Gained: " + get); 288 System.out.println("Expected: " + expected[i]); 289 } 290 } 291 } 292 Assert.assertTrue("getCharacterType()", res); 293 } 294 295 /** 296 * After defining array of strings to be checked and array of expected types, 297 * test calls the method for each string of an array and for all locales.<p> 298 * Has <b> OK </b> status if the method returns type, expected for a given 299 * string and locale. 300 */ 301 @Test 302 public void _getStringType() { 303 boolean res = true; 304 String[] toCheck = new String[]{"01234","AAAAA","bbbbb","AA()bb"}; 305 int[] exp = new int[]{97,226,228,230}; 306 307 for (int j=0;j<toCheck.length;j++) { 308 for (int i=0;i<7;i++) { 309 int get = oObj.getStringType(toCheck[j], 0, 310 toCheck[j].length(), getLocale(i)); 311 res &= (get == exp[j]); 312 if (!res) { 313 System.out.println("FAILED for: language=" + languages[i] + 314 " ; country=" + countries[i]); 315 System.out.println("Expected: " + exp[j]); 316 System.out.println("Gained : " + get); 317 } 318 } 319 } 320 Assert.assertTrue("getStringType()", res); 321 } 322 323 /** 324 * After string to be checked is initialized (all symbols are sorted 325 * by direction, so the character of <code>i<sup><small>th</small></sup></code> 326 * direction is located on <code>i<sup><small>th</small></sup></code> 327 * position), test calls the method for every character of that string. <p> 328 * Has <b> OK </b> status if the method returns direction, that's equal to 329 * a symbol position in the string. 330 */ 331 @Test 332 public void _getCharacterDirection() { 333 boolean res = true; 334 String toCheck = new String(new char[]{65,1470,48,43,35,1632,44,10, 335 9,12,33,8234,8237,1563,8235,8238,8236,768,1}); 336 for (short i=0;i<19;i++) { 337 short get = oObj.getCharacterDirection(toCheck, i); 338 res &= (get == i); 339 if (!res) { 340 System.out.println("Code :" + toCheck.toCharArray()[i]); 341 System.out.println("Gained: " + get); 342 System.out.println("Expected: " + i); 343 } 344 } 345 Assert.assertTrue("getCharacterDirection()", res); 346 } 347 348 /** 349 * At first we define <code>int[]</code> and <code>char[]</code> arrays of 350 * unicode symbol numbers, arranged as sequences, where symbols are sorted 351 * by type, so the character of <code>i<sup><small>th</small></sup></code> 352 * type is located on <code>i<sup><small>th</small></sup></code> position.<p> 353 * Has <b> OK </b> status if for each character method returns value, that 354 * is equal to a number where element is located in array. Also method has 355 * <b> OK </b> status for symbol with code 55296, because it doesn't work 356 * since it hasn't the right neighborhood.<p> 357 * @see "http://ppewww.ph.gla.ac.uk/~flavell/unicode/unidata.html" 358 */ 359 @Test 360 public void _getScript() { 361 boolean res = true; 362 char[] characters = new char[]{65,128,256,384,592,750,773,924,1030,1331,1448, 363 1569,1792,1936,2313,2465,2570,2707,2822,2972,3079,3240,3337,3464,3590, 364 3745,3906,4097,4274,4357,4621,5040,5200,5776,5806,6030,6155,7683,7943, 365 8202,8319,8352,8413,8452,8545,8616,8715,8965,9217,9281,9336,9474,9608,9719, 366 9734,9999,10247,11911,12034,12274,12294,12358,12456,12552,12605,12688,12727, 367 12806,13065,13312,19968,40964,42152,44032,55296,56192,56320,57344,63744, 368 64257,64370,65056,65073,65131,65146,65532,65288}; 369 String toCheck = new String(characters); 370 371 for (int i=0;i<characters.length;i++) { 372 int get = oObj.getScript(toCheck, i); 373 //The HIGH_SURROGATE 55296 doesn't work since it hasn't the right 374 //neighborhood. Neither does 56192 - it combines with 55296 into another code point. 375 if (toCheck.charAt(i) != 55296 && toCheck.charAt(i) != 56192) { 376 res &= (get == i); 377 } 378 if (!res) { 379 System.out.println("-- " + toCheck.substring(i, i + 1).hashCode()); 380 System.out.println("Code: " + Integer.toHexString((int)characters[i])); 381 System.out.println("Gained: " + get + " (" + (0 <= get && get < unicode_script.length ? unicode_script[get] : "out of range") + ")"); 382 System.out.println("Expected: " + unicode_script[i]); 383 } 384 } 385 Assert.assertTrue("getScript()", res); 386 } 387 388 /** 389 * After defining a string to be parsed and parse conditions (flags), test 390 * calls the method for different locales three times with different parameters, 391 * checking result after every call. <p> 392 * Has <b> OK </b> status if the method returns right results all three 393 * times. 394 */ 395 @Test 396 public void _parseAnyToken() { 397 int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE; 398 int nContFlags = KParseTokens.ANY_ALNUM | KParseTokens.ASC_UNDERSCORE 399 | KParseTokens.ASC_DOT; 400 String toCheck = " 18 i18n ^"; 401 ParseResult pRes = null; 402 boolean res = true; 403 404 for (int i=0;i<7;i++) { 405 pRes = oObj.parseAnyToken(toCheck, 1, getLocale(i), 406 nStartFlags, "", nContFlags, ""); 407 res = ( (pRes.CharLen==2) 408 && (pRes.TokenType==32) 409 && (pRes.Value==18.0) ); 410 pRes = oObj.parseAnyToken(toCheck, 4, getLocale(i), 411 nStartFlags, "", nContFlags, ""); 412 res &= ( (pRes.CharLen==4) 413 && (pRes.TokenType==4) 414 && (pRes.Value==0.0) ); 415 pRes = oObj.parseAnyToken(toCheck, 9, getLocale(i), 416 nStartFlags, "", nContFlags, ""); 417 res &= ( (pRes.CharLen==1) 418 && (pRes.TokenType==1) 419 && (pRes.Value==0.0) ); 420 } 421 Assert.assertTrue("parseAnyToken()", res); 422 } 423 424 /** 425 * After defining a string to be parsed and parse conditions (flags), test 426 * calls the method for different locales two times with different parameters, 427 * checking result after every call. <p> 428 * Has <b> OK </b> status if the method returns right results. 429 */ 430 @Test 431 public void _parsePredefinedToken() { 432 int nStartFlags = KParseTokens.ANY_ALPHA | KParseTokens.ASC_UNDERSCORE; 433 int nContFlags = nStartFlags; 434 String toCheck = " 18 int"; 435 ParseResult pRes = null; 436 boolean res = true; 437 438 for (int i=0;i<7;i++) { 439 pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck, 440 1, getLocale(i), nStartFlags, "", nContFlags, ""); 441 res = (pRes.CharLen==0); 442 pRes = oObj.parsePredefinedToken(KParseType.IDENTNAME, toCheck, 443 4, getLocale(i), nStartFlags, "", nContFlags, ""); 444 res &= ( (pRes.CharLen==3) 445 && (pRes.TokenType==4) 446 && (pRes.Value==0.0) ); 447 } 448 Assert.assertTrue("parsePredefinedToken()", res); 449 } 450 451 452 /** 453 * Method returns locale for a given language and country. 454 * @param k index of needed locale. 455 */ 456 private Locale getLocale(int k) { 457 return new Locale(languages[k],countries[k],""); 458 } 459 460 461 } // end XCharacterClassification 462