1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22 #include "unichars.h"
23 #include "osl/diagnose.h"
24 #include "sal/types.h"
25
ImplIsNoncharacter(sal_uInt32 nUtf32)26 int ImplIsNoncharacter(sal_uInt32 nUtf32)
27 {
28 /* All code points that are noncharacters, as of Unicode 3.1.1. */
29 return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF)
30 || (nUtf32 & 0xFFFF) >= 0xFFFE
31 || nUtf32 > 0x10FFFF;
32 }
33
ImplIsControlOrFormat(sal_uInt32 nUtf32)34 int ImplIsControlOrFormat(sal_uInt32 nUtf32)
35 {
36 /* All code points of <http://www.unicode.org/Public/UNIDATA/
37 UnicodeData.txt>, Version 3.1.1, that have a General Category of Cc
38 (Other, Control) or Cf (Other, Format).
39 */
40 return nUtf32 <= 0x001F
41 || (nUtf32 >= 0x007F && nUtf32 <= 0x009F)
42 || nUtf32 == 0x070F /* SYRIAC ABBREVIATION MARK */
43 || nUtf32 == 0x180B /* MONGOLIAN FREE VARIATION SELECTOR ONE */
44 || nUtf32 == 0x180C /* MONGOLIAN FREE VARIATION SELECTOR TWO */
45 || nUtf32 == 0x180D /* MONGOLIAN FREE VARIATION SELECTOR THREE */
46 || nUtf32 == 0x180E /* MONGOLIAN VOWEL SEPARATOR */
47 || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */
48 || nUtf32 == 0x200D /* ZERO WIDTH JOINER */
49 || nUtf32 == 0x200E /* LEFT-TO-RIGHT MARK */
50 || nUtf32 == 0x200F /* RIGHT-TO-LEFT MARK */
51 || nUtf32 == 0x202A /* LEFT-TO-RIGHT EMBEDDING */
52 || nUtf32 == 0x202B /* RIGHT-TO-LEFT EMBEDDING */
53 || nUtf32 == 0x202C /* POP DIRECTIONAL FORMATTING */
54 || nUtf32 == 0x202D /* LEFT-TO-RIGHT OVERRIDE */
55 || nUtf32 == 0x202E /* RIGHT-TO-LEFT OVERRIDE */
56 || nUtf32 == 0x206A /* INHIBIT SYMMETRIC SWAPPING */
57 || nUtf32 == 0x206B /* ACTIVATE SYMMETRIC SWAPPING */
58 || nUtf32 == 0x206C /* INHIBIT ARABIC FORM SHAPING */
59 || nUtf32 == 0x206D /* ACTIVATE ARABIC FORM SHAPING */
60 || nUtf32 == 0x206E /* NATIONAL DIGIT SHAPES */
61 || nUtf32 == 0x206F /* NOMINAL DIGIT SHAPES */
62 || nUtf32 == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */
63 || nUtf32 == 0xFFF9 /* INTERLINEAR ANNOTATION ANCHOR */
64 || nUtf32 == 0xFFFA /* INTERLINEAR ANNOTATION SEPARATOR */
65 || nUtf32 == 0xFFFB /* INTERLINEAR ANNOTATION TERMINATOR */
66 || nUtf32 == 0x1D173 /* MUSICAL SYMBOL BEGIN BEAM */
67 || nUtf32 == 0x1D174 /* MUSICAL SYMBOL END BEAM */
68 || nUtf32 == 0x1D175 /* MUSICAL SYMBOL BEGIN TIE */
69 || nUtf32 == 0x1D176 /* MUSICAL SYMBOL END TIE */
70 || nUtf32 == 0x1D177 /* MUSICAL SYMBOL BEGIN SLUR */
71 || nUtf32 == 0x1D178 /* MUSICAL SYMBOL END SLUR */
72 || nUtf32 == 0x1D179 /* MUSICAL SYMBOL BEGIN PHRASE */
73 || nUtf32 == 0x1D17A /* MUSICAL SYMBOL END PHRASE */
74 || nUtf32 == 0xE0001 /* LANGUAGE TAG */
75 || (nUtf32 >= 0xE0020 && nUtf32 <= 0xE007F);
76 }
77
ImplIsHighSurrogate(sal_uInt32 nUtf32)78 int ImplIsHighSurrogate(sal_uInt32 nUtf32)
79 {
80 /* All code points that are high-surrogates, as of Unicode 3.1.1. */
81 return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF;
82 }
83
ImplIsLowSurrogate(sal_uInt32 nUtf32)84 int ImplIsLowSurrogate(sal_uInt32 nUtf32)
85 {
86 /* All code points that are low-surrogates, as of Unicode 3.1.1. */
87 return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF;
88 }
89
ImplIsPrivateUse(sal_uInt32 nUtf32)90 int ImplIsPrivateUse(sal_uInt32 nUtf32)
91 {
92 /* All code points of <http://www.unicode.org/Public/UNIDATA/
93 UnicodeData.txt>, Version 3.1.1, that have a General Category of Co
94 (Other, Private Use).
95 */
96 return (nUtf32 >= 0xE000 && nUtf32 <= 0xF8FF)
97 || (nUtf32 >= 0xF0000 && nUtf32 <= 0xFFFFD)
98 || (nUtf32 >= 0x100000 && nUtf32 <= 0x10FFFD);
99 }
100
ImplIsZeroWidth(sal_uInt32 nUtf32)101 int ImplIsZeroWidth(sal_uInt32 nUtf32)
102 {
103 /* All code points of <http://www.unicode.org/Public/UNIDATA/
104 UnicodeData.txt>, Version 3.1.1, that have "ZERO WIDTH" in their
105 Character name.
106 */
107 return nUtf32 == 0x200B /* ZERO WIDTH SPACE */
108 || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */
109 || nUtf32 == 0x200D /* ZERO WIDTH JOINER */
110 || nUtf32 == 0xFEFF; /* ZERO WIDTH NO-BREAK SPACE */
111 }
112
ImplGetHighSurrogate(sal_uInt32 nUtf32)113 sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32)
114 {
115 OSL_ENSURE(nUtf32 >= 0x10000, "specification violation");
116 return ((nUtf32 - 0x10000) >> 10) | 0xD800;
117 }
118
ImplGetLowSurrogate(sal_uInt32 nUtf32)119 sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32)
120 {
121 OSL_ENSURE(nUtf32 >= 0x10000, "specification violation");
122 return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00;
123 }
124
ImplCombineSurrogates(sal_uInt32 nHigh,sal_uInt32 nLow)125 sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow)
126 {
127 OSL_ENSURE(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow),
128 "specification violation");
129 return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000;
130 }
131