1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #include "tenchelp.h"
25 #include "unichars.h"
26 #include "rtl/textcvt.h"
27 #include "sal/types.h"
28
29 static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
30 sal_Char * pBuf,
31 sal_Size nMaxLen);
32
33 static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
34 sal_Char * pBuf,
35 sal_Size nMaxLen);
36
37 static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags);
38
ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,sal_Char * pBuf,sal_Size nMaxLen)39 sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
40 sal_Char * pBuf,
41 sal_Size nMaxLen)
42 {
43 if (nMaxLen == 0)
44 return sal_False;
45 switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
46 {
47 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
48 *pBuf = 0x00;
49 break;
50
51 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
52 default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */
53 *pBuf = 0x3F;
54 break;
55
56 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
57 *pBuf = 0x5F;
58 break;
59 }
60 return sal_True;
61 }
62
ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,sal_Char * pBuf,sal_Size nMaxLen)63 sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
64 sal_Char * pBuf,
65 sal_Size nMaxLen)
66 {
67 if (nMaxLen == 0)
68 return sal_False;
69 switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
70 {
71 case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
72 *pBuf = 0x00;
73 break;
74
75 case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
76 default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
77 *pBuf = 0x3F;
78 break;
79
80 case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
81 *pBuf = 0x5F;
82 break;
83 }
84 return sal_True;
85 }
86
ImplIsUnicodeIgnoreChar(sal_Unicode c,sal_uInt32 nFlags)87 int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags )
88 {
89 return
90 ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0
91 && ImplIsZeroWidth(c))
92 || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0
93 && ImplIsControlOrFormat(c))
94 || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0
95 && ImplIsPrivateUse(c));
96 }
97
98 /* ======================================================================= */
99
ImplGetUndefinedUnicodeChar(sal_uChar cChar,sal_uInt32 nFlags)100 sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags)
101 {
102 return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
103 == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ?
104 RTL_TEXTCVT_BYTE_PRIVATE_START + cChar :
105 RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
106 }
107
108 /* ----------------------------------------------------------------------- */
109
110 sal_Bool
ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,sal_Unicode const ** ppSrcBuf,sal_Unicode const * pEndSrcBuf,sal_Char ** ppDestBuf,sal_Char const * pEndDestBuf,sal_uInt32 nFlags,sal_uInt32 * pInfo)111 ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,
112 sal_Unicode const ** ppSrcBuf,
113 sal_Unicode const * pEndSrcBuf,
114 sal_Char ** ppDestBuf,
115 sal_Char const * pEndDestBuf,
116 sal_uInt32 nFlags,
117 sal_uInt32 * pInfo)
118 {
119 sal_Unicode c = **ppSrcBuf;
120
121 (void) pData; /* unused */
122
123 /* Should the private character map to one byte */
124 if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) )
125 {
126 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
127 {
128 **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START);
129 (*ppDestBuf)++;
130 (*ppSrcBuf)++;
131 return sal_True;
132 }
133 }
134
135 /* Should this character ignored (Private, Non Spacing, Control) */
136 if ( ImplIsUnicodeIgnoreChar( c, nFlags ) )
137 {
138 (*ppSrcBuf)++;
139 return sal_True;
140 }
141
142 /* Surrogates Characters should result in */
143 /* one replacement character */
144 if (ImplIsHighSurrogate(c))
145 {
146 if ( *ppSrcBuf == pEndSrcBuf )
147 {
148 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
149 return sal_False;
150 }
151
152 c = *((*ppSrcBuf)+1);
153 if (ImplIsLowSurrogate(c))
154 (*ppSrcBuf)++;
155 else
156 {
157 *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID;
158 if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR )
159 {
160 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
161 return sal_False;
162 }
163 else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE )
164 {
165 (*ppSrcBuf)++;
166 return sal_True;
167 }
168 else if (ImplGetInvalidAsciiMultiByte(nFlags,
169 *ppDestBuf,
170 pEndDestBuf - *ppDestBuf))
171 {
172 ++*ppSrcBuf;
173 ++*ppDestBuf;
174 return sal_True;
175 }
176 else
177 {
178 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
179 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
180 return sal_False;
181 }
182 }
183 }
184
185 *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED;
186 if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR )
187 {
188 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
189 return sal_False;
190 }
191 else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE )
192 (*ppSrcBuf)++;
193 else if (ImplGetUndefinedAsciiMultiByte(nFlags,
194 *ppDestBuf,
195 pEndDestBuf - *ppDestBuf))
196 {
197 ++*ppSrcBuf;
198 ++*ppDestBuf;
199 }
200 else
201 {
202 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
203 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
204 return sal_False;
205 }
206
207 return sal_True;
208 }
209
210