xref: /aoo41x/main/sal/textenc/tenchelp.c (revision 647f063d)
1*647f063dSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*647f063dSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*647f063dSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*647f063dSAndrew Rist  * distributed with this work for additional information
6*647f063dSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*647f063dSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*647f063dSAndrew Rist  * "License"); you may not use this file except in compliance
9*647f063dSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*647f063dSAndrew Rist  *
11*647f063dSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*647f063dSAndrew Rist  *
13*647f063dSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*647f063dSAndrew Rist  * software distributed under the License is distributed on an
15*647f063dSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*647f063dSAndrew Rist  * KIND, either express or implied.  See the License for the
17*647f063dSAndrew Rist  * specific language governing permissions and limitations
18*647f063dSAndrew Rist  * under the License.
19*647f063dSAndrew Rist  *
20*647f063dSAndrew Rist  *************************************************************/
21*647f063dSAndrew Rist 
22*647f063dSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir #include "tenchelp.h"
25cdf0e10cSrcweir #include "unichars.h"
26cdf0e10cSrcweir #include "rtl/textcvt.h"
27cdf0e10cSrcweir #include "sal/types.h"
28cdf0e10cSrcweir 
29cdf0e10cSrcweir static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
30cdf0e10cSrcweir                                                sal_Char * pBuf,
31cdf0e10cSrcweir                                                sal_Size nMaxLen);
32cdf0e10cSrcweir 
33cdf0e10cSrcweir static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
34cdf0e10cSrcweir                                              sal_Char * pBuf,
35cdf0e10cSrcweir                                              sal_Size nMaxLen);
36cdf0e10cSrcweir 
37cdf0e10cSrcweir static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags);
38cdf0e10cSrcweir 
ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,sal_Char * pBuf,sal_Size nMaxLen)39cdf0e10cSrcweir sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
40cdf0e10cSrcweir                                         sal_Char * pBuf,
41cdf0e10cSrcweir                                         sal_Size nMaxLen)
42cdf0e10cSrcweir {
43cdf0e10cSrcweir     if (nMaxLen == 0)
44cdf0e10cSrcweir         return sal_False;
45cdf0e10cSrcweir     switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
46cdf0e10cSrcweir     {
47cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
48cdf0e10cSrcweir         *pBuf = 0x00;
49cdf0e10cSrcweir         break;
50cdf0e10cSrcweir 
51cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
52cdf0e10cSrcweir     default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */
53cdf0e10cSrcweir         *pBuf = 0x3F;
54cdf0e10cSrcweir         break;
55cdf0e10cSrcweir 
56cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
57cdf0e10cSrcweir         *pBuf = 0x5F;
58cdf0e10cSrcweir         break;
59cdf0e10cSrcweir     }
60cdf0e10cSrcweir     return sal_True;
61cdf0e10cSrcweir }
62cdf0e10cSrcweir 
ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,sal_Char * pBuf,sal_Size nMaxLen)63cdf0e10cSrcweir sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
64cdf0e10cSrcweir                                       sal_Char * pBuf,
65cdf0e10cSrcweir                                       sal_Size nMaxLen)
66cdf0e10cSrcweir {
67cdf0e10cSrcweir     if (nMaxLen == 0)
68cdf0e10cSrcweir         return sal_False;
69cdf0e10cSrcweir     switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
70cdf0e10cSrcweir     {
71cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
72cdf0e10cSrcweir         *pBuf = 0x00;
73cdf0e10cSrcweir         break;
74cdf0e10cSrcweir 
75cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
76cdf0e10cSrcweir     default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
77cdf0e10cSrcweir         *pBuf = 0x3F;
78cdf0e10cSrcweir         break;
79cdf0e10cSrcweir 
80cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
81cdf0e10cSrcweir         *pBuf = 0x5F;
82cdf0e10cSrcweir         break;
83cdf0e10cSrcweir     }
84cdf0e10cSrcweir     return sal_True;
85cdf0e10cSrcweir }
86cdf0e10cSrcweir 
ImplIsUnicodeIgnoreChar(sal_Unicode c,sal_uInt32 nFlags)87cdf0e10cSrcweir int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags )
88cdf0e10cSrcweir {
89cdf0e10cSrcweir     return
90cdf0e10cSrcweir         ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0
91cdf0e10cSrcweir          && ImplIsZeroWidth(c))
92cdf0e10cSrcweir         || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0
93cdf0e10cSrcweir             && ImplIsControlOrFormat(c))
94cdf0e10cSrcweir         || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0
95cdf0e10cSrcweir             && ImplIsPrivateUse(c));
96cdf0e10cSrcweir }
97cdf0e10cSrcweir 
98cdf0e10cSrcweir /* ======================================================================= */
99cdf0e10cSrcweir 
ImplGetUndefinedUnicodeChar(sal_uChar cChar,sal_uInt32 nFlags)100cdf0e10cSrcweir sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags)
101cdf0e10cSrcweir {
102cdf0e10cSrcweir     return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
103cdf0e10cSrcweir                    == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ?
104cdf0e10cSrcweir                RTL_TEXTCVT_BYTE_PRIVATE_START + cChar :
105cdf0e10cSrcweir                RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
106cdf0e10cSrcweir }
107cdf0e10cSrcweir 
108cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
109cdf0e10cSrcweir 
110cdf0e10cSrcweir sal_Bool
ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,sal_Unicode const ** ppSrcBuf,sal_Unicode const * pEndSrcBuf,sal_Char ** ppDestBuf,sal_Char const * pEndDestBuf,sal_uInt32 nFlags,sal_uInt32 * pInfo)111cdf0e10cSrcweir ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,
112cdf0e10cSrcweir                                      sal_Unicode const ** ppSrcBuf,
113cdf0e10cSrcweir                                      sal_Unicode const * pEndSrcBuf,
114cdf0e10cSrcweir                                      sal_Char ** ppDestBuf,
115cdf0e10cSrcweir                                      sal_Char const * pEndDestBuf,
116cdf0e10cSrcweir                                      sal_uInt32 nFlags,
117cdf0e10cSrcweir                                      sal_uInt32 * pInfo)
118cdf0e10cSrcweir {
119cdf0e10cSrcweir     sal_Unicode c = **ppSrcBuf;
120cdf0e10cSrcweir 
121cdf0e10cSrcweir     (void) pData; /* unused */
122cdf0e10cSrcweir 
123cdf0e10cSrcweir     /* Should the private character map to one byte */
124cdf0e10cSrcweir     if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) )
125cdf0e10cSrcweir     {
126cdf0e10cSrcweir         if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
127cdf0e10cSrcweir         {
128cdf0e10cSrcweir             **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START);
129cdf0e10cSrcweir             (*ppDestBuf)++;
130cdf0e10cSrcweir             (*ppSrcBuf)++;
131cdf0e10cSrcweir             return sal_True;
132cdf0e10cSrcweir         }
133cdf0e10cSrcweir     }
134cdf0e10cSrcweir 
135cdf0e10cSrcweir     /* Should this character ignored (Private, Non Spacing, Control) */
136cdf0e10cSrcweir     if ( ImplIsUnicodeIgnoreChar( c, nFlags ) )
137cdf0e10cSrcweir     {
138cdf0e10cSrcweir         (*ppSrcBuf)++;
139cdf0e10cSrcweir         return sal_True;
140cdf0e10cSrcweir     }
141cdf0e10cSrcweir 
142cdf0e10cSrcweir     /* Surrogates Characters should result in */
143cdf0e10cSrcweir     /* one replacement character */
144cdf0e10cSrcweir     if (ImplIsHighSurrogate(c))
145cdf0e10cSrcweir     {
146cdf0e10cSrcweir         if ( *ppSrcBuf == pEndSrcBuf )
147cdf0e10cSrcweir         {
148cdf0e10cSrcweir             *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
149cdf0e10cSrcweir             return sal_False;
150cdf0e10cSrcweir         }
151cdf0e10cSrcweir 
152cdf0e10cSrcweir         c = *((*ppSrcBuf)+1);
153cdf0e10cSrcweir         if (ImplIsLowSurrogate(c))
154cdf0e10cSrcweir             (*ppSrcBuf)++;
155cdf0e10cSrcweir         else
156cdf0e10cSrcweir         {
157cdf0e10cSrcweir             *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID;
158cdf0e10cSrcweir             if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR )
159cdf0e10cSrcweir             {
160cdf0e10cSrcweir                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
161cdf0e10cSrcweir                 return sal_False;
162cdf0e10cSrcweir             }
163cdf0e10cSrcweir             else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE )
164cdf0e10cSrcweir             {
165cdf0e10cSrcweir                 (*ppSrcBuf)++;
166cdf0e10cSrcweir                 return sal_True;
167cdf0e10cSrcweir             }
168cdf0e10cSrcweir             else if (ImplGetInvalidAsciiMultiByte(nFlags,
169cdf0e10cSrcweir                                                   *ppDestBuf,
170cdf0e10cSrcweir                                                   pEndDestBuf - *ppDestBuf))
171cdf0e10cSrcweir             {
172cdf0e10cSrcweir                 ++*ppSrcBuf;
173cdf0e10cSrcweir                 ++*ppDestBuf;
174cdf0e10cSrcweir                 return sal_True;
175cdf0e10cSrcweir             }
176cdf0e10cSrcweir             else
177cdf0e10cSrcweir             {
178cdf0e10cSrcweir                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
179cdf0e10cSrcweir                               | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
180cdf0e10cSrcweir                 return sal_False;
181cdf0e10cSrcweir             }
182cdf0e10cSrcweir         }
183cdf0e10cSrcweir     }
184cdf0e10cSrcweir 
185cdf0e10cSrcweir     *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED;
186cdf0e10cSrcweir     if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR )
187cdf0e10cSrcweir     {
188cdf0e10cSrcweir         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
189cdf0e10cSrcweir         return sal_False;
190cdf0e10cSrcweir     }
191cdf0e10cSrcweir     else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE )
192cdf0e10cSrcweir         (*ppSrcBuf)++;
193cdf0e10cSrcweir     else if (ImplGetUndefinedAsciiMultiByte(nFlags,
194cdf0e10cSrcweir                                             *ppDestBuf,
195cdf0e10cSrcweir                                             pEndDestBuf - *ppDestBuf))
196cdf0e10cSrcweir     {
197cdf0e10cSrcweir         ++*ppSrcBuf;
198cdf0e10cSrcweir         ++*ppDestBuf;
199cdf0e10cSrcweir     }
200cdf0e10cSrcweir     else
201cdf0e10cSrcweir     {
202cdf0e10cSrcweir         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
203cdf0e10cSrcweir                       | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
204cdf0e10cSrcweir         return sal_False;
205cdf0e10cSrcweir     }
206cdf0e10cSrcweir 
207cdf0e10cSrcweir     return sal_True;
208cdf0e10cSrcweir }
209cdf0e10cSrcweir 
210