xref: /trunk/main/sal/textenc/converter.c (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include "converter.h"
29 #include "tenchelp.h"
30 #include "unichars.h"
31 #include "rtl/textcvt.h"
32 #include "sal/types.h"
33 
34 ImplBadInputConversionAction ImplHandleBadInputTextToUnicodeConversion(
35     sal_Bool bUndefined, sal_Bool bMultiByte, sal_Char cByte, sal_uInt32 nFlags,
36     sal_Unicode ** pDestBufPtr, sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo)
37 {
38     *pInfo |= bUndefined
39         ? (bMultiByte
40            ? RTL_TEXTTOUNICODE_INFO_MBUNDEFINED
41            : RTL_TEXTTOUNICODE_INFO_UNDEFINED)
42         : RTL_TEXTTOUNICODE_INFO_INVALID;
43     switch (nFlags
44             & (bUndefined
45                ? (bMultiByte
46                   ? RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK
47                   : RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
48                : RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK))
49     {
50     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR:
51     case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR:
52     case RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR:
53         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
54         return IMPL_BAD_INPUT_STOP;
55 
56     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE:
57     case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE:
58     case RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE:
59         return IMPL_BAD_INPUT_CONTINUE;
60 
61     case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE:
62         if (*pDestBufPtr != pDestBufEnd)
63         {
64             *(*pDestBufPtr)++ = RTL_TEXTCVT_BYTE_PRIVATE_START
65                 | ((sal_uChar) cByte);
66             return IMPL_BAD_INPUT_CONTINUE;
67         }
68         else
69             return IMPL_BAD_INPUT_NO_OUTPUT;
70 
71     default: /* RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT,
72                 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT,
73                 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT */
74         if (*pDestBufPtr != pDestBufEnd)
75         {
76             *(*pDestBufPtr)++ = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
77             return IMPL_BAD_INPUT_CONTINUE;
78         }
79         else
80             return IMPL_BAD_INPUT_NO_OUTPUT;
81     }
82 }
83 
84 ImplBadInputConversionAction
85 ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined,
86                                           sal_uInt32 nUtf32,
87                                           sal_uInt32 nFlags,
88                                           sal_Char ** pDestBufPtr,
89                                           sal_Char * pDestBufEnd,
90                                           sal_uInt32 * pInfo,
91                                           sal_Char const * pPrefix,
92                                           sal_Size nPrefixLen,
93                                           sal_Bool * pPrefixWritten)
94 {
95     /* TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
96              RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR */
97 
98     sal_Char cReplace;
99 
100     if (bUndefined)
101     {
102         if (ImplIsControlOrFormat(nUtf32))
103         {
104             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0)
105                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
106         }
107         else if (ImplIsPrivateUse(nUtf32))
108         {
109             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0)
110                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
111             else if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0) != 0)
112                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0;
113         }
114         else if (ImplIsZeroWidth(nUtf32))
115         {
116             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0)
117                 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE;
118         }
119     }
120     *pInfo |= bUndefined ? RTL_UNICODETOTEXT_INFO_UNDEFINED :
121                            RTL_UNICODETOTEXT_INFO_INVALID;
122     switch (nFlags & (bUndefined ? RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK :
123                                    RTL_UNICODETOTEXT_FLAGS_INVALID_MASK))
124     {
125     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR:
126     case RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR:
127         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
128         return IMPL_BAD_INPUT_STOP;
129 
130     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE:
131     case RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE:
132         if (pPrefixWritten)
133             *pPrefixWritten = sal_False;
134         return IMPL_BAD_INPUT_CONTINUE;
135 
136     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
137     case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
138         cReplace = 0;
139         break;
140 
141     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
142     case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
143     default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT,
144                 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
145         cReplace = '?';
146         break;
147 
148     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
149     case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
150         cReplace = '_';
151         break;
152     }
153     if ((sal_Size) (pDestBufEnd - *pDestBufPtr) > nPrefixLen)
154     {
155         while (nPrefixLen-- > 0)
156             *(*pDestBufPtr)++ = *pPrefix++;
157         *(*pDestBufPtr)++ = cReplace;
158         if (pPrefixWritten)
159             *pPrefixWritten = sal_True;
160         return IMPL_BAD_INPUT_CONTINUE;
161     }
162     else
163         return IMPL_BAD_INPUT_NO_OUTPUT;
164 }
165