1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 #include "unichars.h" 23 #include "osl/diagnose.h" 24 #include "sal/types.h" 25 26 int ImplIsNoncharacter(sal_uInt32 nUtf32) 27 { 28 /* All code points that are noncharacters, as of Unicode 3.1.1. */ 29 return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF) 30 || (nUtf32 & 0xFFFF) >= 0xFFFE 31 || nUtf32 > 0x10FFFF; 32 } 33 34 int ImplIsControlOrFormat(sal_uInt32 nUtf32) 35 { 36 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 37 UnicodeData.txt>, Version 3.1.1, that have a General Category of Cc 38 (Other, Control) or Cf (Other, Format). 39 */ 40 return nUtf32 <= 0x001F 41 || (nUtf32 >= 0x007F && nUtf32 <= 0x009F) 42 || nUtf32 == 0x070F /* SYRIAC ABBREVIATION MARK */ 43 || nUtf32 == 0x180B /* MONGOLIAN FREE VARIATION SELECTOR ONE */ 44 || nUtf32 == 0x180C /* MONGOLIAN FREE VARIATION SELECTOR TWO */ 45 || nUtf32 == 0x180D /* MONGOLIAN FREE VARIATION SELECTOR THREE */ 46 || nUtf32 == 0x180E /* MONGOLIAN VOWEL SEPARATOR */ 47 || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */ 48 || nUtf32 == 0x200D /* ZERO WIDTH JOINER */ 49 || nUtf32 == 0x200E /* LEFT-TO-RIGHT MARK */ 50 || nUtf32 == 0x200F /* RIGHT-TO-LEFT MARK */ 51 || nUtf32 == 0x202A /* LEFT-TO-RIGHT EMBEDDING */ 52 || nUtf32 == 0x202B /* RIGHT-TO-LEFT EMBEDDING */ 53 || nUtf32 == 0x202C /* POP DIRECTIONAL FORMATTING */ 54 || nUtf32 == 0x202D /* LEFT-TO-RIGHT OVERRIDE */ 55 || nUtf32 == 0x202E /* RIGHT-TO-LEFT OVERRIDE */ 56 || nUtf32 == 0x206A /* INHIBIT SYMMETRIC SWAPPING */ 57 || nUtf32 == 0x206B /* ACTIVATE SYMMETRIC SWAPPING */ 58 || nUtf32 == 0x206C /* INHIBIT ARABIC FORM SHAPING */ 59 || nUtf32 == 0x206D /* ACTIVATE ARABIC FORM SHAPING */ 60 || nUtf32 == 0x206E /* NATIONAL DIGIT SHAPES */ 61 || nUtf32 == 0x206F /* NOMINAL DIGIT SHAPES */ 62 || nUtf32 == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */ 63 || nUtf32 == 0xFFF9 /* INTERLINEAR ANNOTATION ANCHOR */ 64 || nUtf32 == 0xFFFA /* INTERLINEAR ANNOTATION SEPARATOR */ 65 || nUtf32 == 0xFFFB /* INTERLINEAR ANNOTATION TERMINATOR */ 66 || nUtf32 == 0x1D173 /* MUSICAL SYMBOL BEGIN BEAM */ 67 || nUtf32 == 0x1D174 /* MUSICAL SYMBOL END BEAM */ 68 || nUtf32 == 0x1D175 /* MUSICAL SYMBOL BEGIN TIE */ 69 || nUtf32 == 0x1D176 /* MUSICAL SYMBOL END TIE */ 70 || nUtf32 == 0x1D177 /* MUSICAL SYMBOL BEGIN SLUR */ 71 || nUtf32 == 0x1D178 /* MUSICAL SYMBOL END SLUR */ 72 || nUtf32 == 0x1D179 /* MUSICAL SYMBOL BEGIN PHRASE */ 73 || nUtf32 == 0x1D17A /* MUSICAL SYMBOL END PHRASE */ 74 || nUtf32 == 0xE0001 /* LANGUAGE TAG */ 75 || (nUtf32 >= 0xE0020 && nUtf32 <= 0xE007F); 76 } 77 78 int ImplIsHighSurrogate(sal_uInt32 nUtf32) 79 { 80 /* All code points that are high-surrogates, as of Unicode 3.1.1. */ 81 return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF; 82 } 83 84 int ImplIsLowSurrogate(sal_uInt32 nUtf32) 85 { 86 /* All code points that are low-surrogates, as of Unicode 3.1.1. */ 87 return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF; 88 } 89 90 int ImplIsPrivateUse(sal_uInt32 nUtf32) 91 { 92 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 93 UnicodeData.txt>, Version 3.1.1, that have a General Category of Co 94 (Other, Private Use). 95 */ 96 return (nUtf32 >= 0xE000 && nUtf32 <= 0xF8FF) 97 || (nUtf32 >= 0xF0000 && nUtf32 <= 0xFFFFD) 98 || (nUtf32 >= 0x100000 && nUtf32 <= 0x10FFFD); 99 } 100 101 int ImplIsZeroWidth(sal_uInt32 nUtf32) 102 { 103 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 104 UnicodeData.txt>, Version 3.1.1, that have "ZERO WIDTH" in their 105 Character name. 106 */ 107 return nUtf32 == 0x200B /* ZERO WIDTH SPACE */ 108 || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */ 109 || nUtf32 == 0x200D /* ZERO WIDTH JOINER */ 110 || nUtf32 == 0xFEFF; /* ZERO WIDTH NO-BREAK SPACE */ 111 } 112 113 sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32) 114 { 115 OSL_ENSURE(nUtf32 >= 0x10000, "specification violation"); 116 return ((nUtf32 - 0x10000) >> 10) | 0xD800; 117 } 118 119 sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32) 120 { 121 OSL_ENSURE(nUtf32 >= 0x10000, "specification violation"); 122 return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00; 123 } 124 125 sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow) 126 { 127 OSL_ENSURE(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow), 128 "specification violation"); 129 return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000; 130 } 131