xref: /aoo42x/main/sal/textenc/converter.c (revision 647f063d)
1*647f063dSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*647f063dSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*647f063dSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*647f063dSAndrew Rist  * distributed with this work for additional information
6*647f063dSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*647f063dSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*647f063dSAndrew Rist  * "License"); you may not use this file except in compliance
9*647f063dSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*647f063dSAndrew Rist  *
11*647f063dSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*647f063dSAndrew Rist  *
13*647f063dSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*647f063dSAndrew Rist  * software distributed under the License is distributed on an
15*647f063dSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*647f063dSAndrew Rist  * KIND, either express or implied.  See the License for the
17*647f063dSAndrew Rist  * specific language governing permissions and limitations
18*647f063dSAndrew Rist  * under the License.
19*647f063dSAndrew Rist  *
20*647f063dSAndrew Rist  *************************************************************/
21*647f063dSAndrew Rist 
22*647f063dSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir #include "converter.h"
25cdf0e10cSrcweir #include "tenchelp.h"
26cdf0e10cSrcweir #include "unichars.h"
27cdf0e10cSrcweir #include "rtl/textcvt.h"
28cdf0e10cSrcweir #include "sal/types.h"
29cdf0e10cSrcweir 
ImplHandleBadInputTextToUnicodeConversion(sal_Bool bUndefined,sal_Bool bMultiByte,sal_Char cByte,sal_uInt32 nFlags,sal_Unicode ** pDestBufPtr,sal_Unicode * pDestBufEnd,sal_uInt32 * pInfo)30cdf0e10cSrcweir ImplBadInputConversionAction ImplHandleBadInputTextToUnicodeConversion(
31cdf0e10cSrcweir     sal_Bool bUndefined, sal_Bool bMultiByte, sal_Char cByte, sal_uInt32 nFlags,
32cdf0e10cSrcweir     sal_Unicode ** pDestBufPtr, sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo)
33cdf0e10cSrcweir {
34cdf0e10cSrcweir     *pInfo |= bUndefined
35cdf0e10cSrcweir         ? (bMultiByte
36cdf0e10cSrcweir            ? RTL_TEXTTOUNICODE_INFO_MBUNDEFINED
37cdf0e10cSrcweir            : RTL_TEXTTOUNICODE_INFO_UNDEFINED)
38cdf0e10cSrcweir         : RTL_TEXTTOUNICODE_INFO_INVALID;
39cdf0e10cSrcweir     switch (nFlags
40cdf0e10cSrcweir             & (bUndefined
41cdf0e10cSrcweir                ? (bMultiByte
42cdf0e10cSrcweir                   ? RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK
43cdf0e10cSrcweir                   : RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
44cdf0e10cSrcweir                : RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK))
45cdf0e10cSrcweir     {
46cdf0e10cSrcweir     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR:
47cdf0e10cSrcweir     case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR:
48cdf0e10cSrcweir     case RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR:
49cdf0e10cSrcweir         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
50cdf0e10cSrcweir         return IMPL_BAD_INPUT_STOP;
51cdf0e10cSrcweir 
52cdf0e10cSrcweir     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE:
53cdf0e10cSrcweir     case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE:
54cdf0e10cSrcweir     case RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE:
55cdf0e10cSrcweir         return IMPL_BAD_INPUT_CONTINUE;
56cdf0e10cSrcweir 
57cdf0e10cSrcweir     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE:
58cdf0e10cSrcweir         if (*pDestBufPtr != pDestBufEnd)
59cdf0e10cSrcweir         {
60cdf0e10cSrcweir             *(*pDestBufPtr)++ = RTL_TEXTCVT_BYTE_PRIVATE_START
61cdf0e10cSrcweir                 | ((sal_uChar) cByte);
62cdf0e10cSrcweir             return IMPL_BAD_INPUT_CONTINUE;
63cdf0e10cSrcweir         }
64cdf0e10cSrcweir         else
65cdf0e10cSrcweir             return IMPL_BAD_INPUT_NO_OUTPUT;
66cdf0e10cSrcweir 
67cdf0e10cSrcweir     default: /* RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT,
68cdf0e10cSrcweir                 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT,
69cdf0e10cSrcweir                 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT */
70cdf0e10cSrcweir         if (*pDestBufPtr != pDestBufEnd)
71cdf0e10cSrcweir         {
72cdf0e10cSrcweir             *(*pDestBufPtr)++ = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
73cdf0e10cSrcweir             return IMPL_BAD_INPUT_CONTINUE;
74cdf0e10cSrcweir         }
75cdf0e10cSrcweir         else
76cdf0e10cSrcweir             return IMPL_BAD_INPUT_NO_OUTPUT;
77cdf0e10cSrcweir     }
78cdf0e10cSrcweir }
79cdf0e10cSrcweir 
80cdf0e10cSrcweir ImplBadInputConversionAction
ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined,sal_uInt32 nUtf32,sal_uInt32 nFlags,sal_Char ** pDestBufPtr,sal_Char * pDestBufEnd,sal_uInt32 * pInfo,sal_Char const * pPrefix,sal_Size nPrefixLen,sal_Bool * pPrefixWritten)81cdf0e10cSrcweir ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined,
82cdf0e10cSrcweir                                           sal_uInt32 nUtf32,
83cdf0e10cSrcweir                                           sal_uInt32 nFlags,
84cdf0e10cSrcweir                                           sal_Char ** pDestBufPtr,
85cdf0e10cSrcweir                                           sal_Char * pDestBufEnd,
86cdf0e10cSrcweir                                           sal_uInt32 * pInfo,
87cdf0e10cSrcweir                                           sal_Char const * pPrefix,
88cdf0e10cSrcweir                                           sal_Size nPrefixLen,
89cdf0e10cSrcweir                                           sal_Bool * pPrefixWritten)
90cdf0e10cSrcweir {
91cdf0e10cSrcweir     /* TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
92cdf0e10cSrcweir              RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR */
93cdf0e10cSrcweir 
94cdf0e10cSrcweir     sal_Char cReplace;
95cdf0e10cSrcweir 
96cdf0e10cSrcweir     if (bUndefined)
97cdf0e10cSrcweir     {
98cdf0e10cSrcweir         if (ImplIsControlOrFormat(nUtf32))
99cdf0e10cSrcweir         {
100cdf0e10cSrcweir             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0)
101cdf0e10cSrcweir                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
102cdf0e10cSrcweir         }
103cdf0e10cSrcweir         else if (ImplIsPrivateUse(nUtf32))
104cdf0e10cSrcweir         {
105cdf0e10cSrcweir             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0)
106cdf0e10cSrcweir                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
107cdf0e10cSrcweir             else if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0) != 0)
108cdf0e10cSrcweir                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0;
109cdf0e10cSrcweir         }
110cdf0e10cSrcweir         else if (ImplIsZeroWidth(nUtf32))
111cdf0e10cSrcweir         {
112cdf0e10cSrcweir             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0)
113cdf0e10cSrcweir                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
114cdf0e10cSrcweir         }
115cdf0e10cSrcweir     }
116cdf0e10cSrcweir     *pInfo |= bUndefined ? RTL_UNICODETOTEXT_INFO_UNDEFINED :
117cdf0e10cSrcweir                            RTL_UNICODETOTEXT_INFO_INVALID;
118cdf0e10cSrcweir     switch (nFlags & (bUndefined ? RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK :
119cdf0e10cSrcweir                                    RTL_UNICODETOTEXT_FLAGS_INVALID_MASK))
120cdf0e10cSrcweir     {
121cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR:
122cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR:
123cdf0e10cSrcweir         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
124cdf0e10cSrcweir         return IMPL_BAD_INPUT_STOP;
125cdf0e10cSrcweir 
126cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE:
127cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE:
128cdf0e10cSrcweir         if (pPrefixWritten)
129cdf0e10cSrcweir             *pPrefixWritten = sal_False;
130cdf0e10cSrcweir         return IMPL_BAD_INPUT_CONTINUE;
131cdf0e10cSrcweir 
132cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
133cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
134cdf0e10cSrcweir         cReplace = 0;
135cdf0e10cSrcweir         break;
136cdf0e10cSrcweir 
137cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
138cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
139cdf0e10cSrcweir     default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT,
140cdf0e10cSrcweir                 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
141cdf0e10cSrcweir         cReplace = '?';
142cdf0e10cSrcweir         break;
143cdf0e10cSrcweir 
144cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
145cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
146cdf0e10cSrcweir         cReplace = '_';
147cdf0e10cSrcweir         break;
148cdf0e10cSrcweir     }
149cdf0e10cSrcweir     if ((sal_Size) (pDestBufEnd - *pDestBufPtr) > nPrefixLen)
150cdf0e10cSrcweir     {
151cdf0e10cSrcweir         while (nPrefixLen-- > 0)
152cdf0e10cSrcweir             *(*pDestBufPtr)++ = *pPrefix++;
153cdf0e10cSrcweir         *(*pDestBufPtr)++ = cReplace;
154cdf0e10cSrcweir         if (pPrefixWritten)
155cdf0e10cSrcweir             *pPrefixWritten = sal_True;
156cdf0e10cSrcweir         return IMPL_BAD_INPUT_CONTINUE;
157cdf0e10cSrcweir     }
158cdf0e10cSrcweir     else
159cdf0e10cSrcweir         return IMPL_BAD_INPUT_NO_OUTPUT;
160cdf0e10cSrcweir }
161