xref: /trunk/main/sal/textenc/tenchelp.c (revision 647f063d)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #include "tenchelp.h"
25 #include "unichars.h"
26 #include "rtl/textcvt.h"
27 #include "sal/types.h"
28 
29 static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
30                                                sal_Char * pBuf,
31                                                sal_Size nMaxLen);
32 
33 static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
34                                              sal_Char * pBuf,
35                                              sal_Size nMaxLen);
36 
37 static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags);
38 
ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,sal_Char * pBuf,sal_Size nMaxLen)39 sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
40                                         sal_Char * pBuf,
41                                         sal_Size nMaxLen)
42 {
43     if (nMaxLen == 0)
44         return sal_False;
45     switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
46     {
47     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
48         *pBuf = 0x00;
49         break;
50 
51     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
52     default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */
53         *pBuf = 0x3F;
54         break;
55 
56     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
57         *pBuf = 0x5F;
58         break;
59     }
60     return sal_True;
61 }
62 
ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,sal_Char * pBuf,sal_Size nMaxLen)63 sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
64                                       sal_Char * pBuf,
65                                       sal_Size nMaxLen)
66 {
67     if (nMaxLen == 0)
68         return sal_False;
69     switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
70     {
71     case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
72         *pBuf = 0x00;
73         break;
74 
75     case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
76     default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
77         *pBuf = 0x3F;
78         break;
79 
80     case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
81         *pBuf = 0x5F;
82         break;
83     }
84     return sal_True;
85 }
86 
ImplIsUnicodeIgnoreChar(sal_Unicode c,sal_uInt32 nFlags)87 int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags )
88 {
89     return
90         ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0
91          && ImplIsZeroWidth(c))
92         || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0
93             && ImplIsControlOrFormat(c))
94         || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0
95             && ImplIsPrivateUse(c));
96 }
97 
98 /* ======================================================================= */
99 
ImplGetUndefinedUnicodeChar(sal_uChar cChar,sal_uInt32 nFlags)100 sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags)
101 {
102     return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
103                    == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ?
104                RTL_TEXTCVT_BYTE_PRIVATE_START + cChar :
105                RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
106 }
107 
108 /* ----------------------------------------------------------------------- */
109 
110 sal_Bool
ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,sal_Unicode const ** ppSrcBuf,sal_Unicode const * pEndSrcBuf,sal_Char ** ppDestBuf,sal_Char const * pEndDestBuf,sal_uInt32 nFlags,sal_uInt32 * pInfo)111 ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,
112                                      sal_Unicode const ** ppSrcBuf,
113                                      sal_Unicode const * pEndSrcBuf,
114                                      sal_Char ** ppDestBuf,
115                                      sal_Char const * pEndDestBuf,
116                                      sal_uInt32 nFlags,
117                                      sal_uInt32 * pInfo)
118 {
119     sal_Unicode c = **ppSrcBuf;
120 
121     (void) pData; /* unused */
122 
123     /* Should the private character map to one byte */
124     if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) )
125     {
126         if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
127         {
128             **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START);
129             (*ppDestBuf)++;
130             (*ppSrcBuf)++;
131             return sal_True;
132         }
133     }
134 
135     /* Should this character ignored (Private, Non Spacing, Control) */
136     if ( ImplIsUnicodeIgnoreChar( c, nFlags ) )
137     {
138         (*ppSrcBuf)++;
139         return sal_True;
140     }
141 
142     /* Surrogates Characters should result in */
143     /* one replacement character */
144     if (ImplIsHighSurrogate(c))
145     {
146         if ( *ppSrcBuf == pEndSrcBuf )
147         {
148             *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
149             return sal_False;
150         }
151 
152         c = *((*ppSrcBuf)+1);
153         if (ImplIsLowSurrogate(c))
154             (*ppSrcBuf)++;
155         else
156         {
157             *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID;
158             if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR )
159             {
160                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
161                 return sal_False;
162             }
163             else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE )
164             {
165                 (*ppSrcBuf)++;
166                 return sal_True;
167             }
168             else if (ImplGetInvalidAsciiMultiByte(nFlags,
169                                                   *ppDestBuf,
170                                                   pEndDestBuf - *ppDestBuf))
171             {
172                 ++*ppSrcBuf;
173                 ++*ppDestBuf;
174                 return sal_True;
175             }
176             else
177             {
178                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
179                               | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
180                 return sal_False;
181             }
182         }
183     }
184 
185     *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED;
186     if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR )
187     {
188         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
189         return sal_False;
190     }
191     else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE )
192         (*ppSrcBuf)++;
193     else if (ImplGetUndefinedAsciiMultiByte(nFlags,
194                                             *ppDestBuf,
195                                             pEndDestBuf - *ppDestBuf))
196     {
197         ++*ppSrcBuf;
198         ++*ppDestBuf;
199     }
200     else
201     {
202         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
203                       | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
204         return sal_False;
205     }
206 
207     return sal_True;
208 }
209 
210