xref: /trunk/main/sal/textenc/converter.c (revision 647f063d)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #include "converter.h"
25 #include "tenchelp.h"
26 #include "unichars.h"
27 #include "rtl/textcvt.h"
28 #include "sal/types.h"
29 
ImplHandleBadInputTextToUnicodeConversion(sal_Bool bUndefined,sal_Bool bMultiByte,sal_Char cByte,sal_uInt32 nFlags,sal_Unicode ** pDestBufPtr,sal_Unicode * pDestBufEnd,sal_uInt32 * pInfo)30 ImplBadInputConversionAction ImplHandleBadInputTextToUnicodeConversion(
31     sal_Bool bUndefined, sal_Bool bMultiByte, sal_Char cByte, sal_uInt32 nFlags,
32     sal_Unicode ** pDestBufPtr, sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo)
33 {
34     *pInfo |= bUndefined
35         ? (bMultiByte
36            ? RTL_TEXTTOUNICODE_INFO_MBUNDEFINED
37            : RTL_TEXTTOUNICODE_INFO_UNDEFINED)
38         : RTL_TEXTTOUNICODE_INFO_INVALID;
39     switch (nFlags
40             & (bUndefined
41                ? (bMultiByte
42                   ? RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK
43                   : RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
44                : RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK))
45     {
46     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR:
47     case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR:
48     case RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR:
49         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
50         return IMPL_BAD_INPUT_STOP;
51 
52     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE:
53     case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE:
54     case RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE:
55         return IMPL_BAD_INPUT_CONTINUE;
56 
57     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE:
58         if (*pDestBufPtr != pDestBufEnd)
59         {
60             *(*pDestBufPtr)++ = RTL_TEXTCVT_BYTE_PRIVATE_START
61                 | ((sal_uChar) cByte);
62             return IMPL_BAD_INPUT_CONTINUE;
63         }
64         else
65             return IMPL_BAD_INPUT_NO_OUTPUT;
66 
67     default: /* RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT,
68                 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT,
69                 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT */
70         if (*pDestBufPtr != pDestBufEnd)
71         {
72             *(*pDestBufPtr)++ = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
73             return IMPL_BAD_INPUT_CONTINUE;
74         }
75         else
76             return IMPL_BAD_INPUT_NO_OUTPUT;
77     }
78 }
79 
80 ImplBadInputConversionAction
ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined,sal_uInt32 nUtf32,sal_uInt32 nFlags,sal_Char ** pDestBufPtr,sal_Char * pDestBufEnd,sal_uInt32 * pInfo,sal_Char const * pPrefix,sal_Size nPrefixLen,sal_Bool * pPrefixWritten)81 ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined,
82                                           sal_uInt32 nUtf32,
83                                           sal_uInt32 nFlags,
84                                           sal_Char ** pDestBufPtr,
85                                           sal_Char * pDestBufEnd,
86                                           sal_uInt32 * pInfo,
87                                           sal_Char const * pPrefix,
88                                           sal_Size nPrefixLen,
89                                           sal_Bool * pPrefixWritten)
90 {
91     /* TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
92              RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR */
93 
94     sal_Char cReplace;
95 
96     if (bUndefined)
97     {
98         if (ImplIsControlOrFormat(nUtf32))
99         {
100             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0)
101                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
102         }
103         else if (ImplIsPrivateUse(nUtf32))
104         {
105             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0)
106                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
107             else if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0) != 0)
108                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0;
109         }
110         else if (ImplIsZeroWidth(nUtf32))
111         {
112             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0)
113                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
114         }
115     }
116     *pInfo |= bUndefined ? RTL_UNICODETOTEXT_INFO_UNDEFINED :
117                            RTL_UNICODETOTEXT_INFO_INVALID;
118     switch (nFlags & (bUndefined ? RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK :
119                                    RTL_UNICODETOTEXT_FLAGS_INVALID_MASK))
120     {
121     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR:
122     case RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR:
123         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
124         return IMPL_BAD_INPUT_STOP;
125 
126     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE:
127     case RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE:
128         if (pPrefixWritten)
129             *pPrefixWritten = sal_False;
130         return IMPL_BAD_INPUT_CONTINUE;
131 
132     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
133     case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
134         cReplace = 0;
135         break;
136 
137     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
138     case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
139     default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT,
140                 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
141         cReplace = '?';
142         break;
143 
144     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
145     case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
146         cReplace = '_';
147         break;
148     }
149     if ((sal_Size) (pDestBufEnd - *pDestBufPtr) > nPrefixLen)
150     {
151         while (nPrefixLen-- > 0)
152             *(*pDestBufPtr)++ = *pPrefix++;
153         *(*pDestBufPtr)++ = cReplace;
154         if (pPrefixWritten)
155             *pPrefixWritten = sal_True;
156         return IMPL_BAD_INPUT_CONTINUE;
157     }
158     else
159         return IMPL_BAD_INPUT_NO_OUTPUT;
160 }
161