1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include "converter.h" 29 #include "tenchelp.h" 30 #include "unichars.h" 31 #include "rtl/textcvt.h" 32 #include "sal/types.h" 33 34 ImplBadInputConversionAction ImplHandleBadInputTextToUnicodeConversion( 35 sal_Bool bUndefined, sal_Bool bMultiByte, sal_Char cByte, sal_uInt32 nFlags, 36 sal_Unicode ** pDestBufPtr, sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo) 37 { 38 *pInfo |= bUndefined 39 ? (bMultiByte 40 ? RTL_TEXTTOUNICODE_INFO_MBUNDEFINED 41 : RTL_TEXTTOUNICODE_INFO_UNDEFINED) 42 : RTL_TEXTTOUNICODE_INFO_INVALID; 43 switch (nFlags 44 & (bUndefined 45 ? (bMultiByte 46 ? RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK 47 : RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) 48 : RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK)) 49 { 50 case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR: 51 case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR: 52 case RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR: 53 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 54 return IMPL_BAD_INPUT_STOP; 55 56 case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE: 57 case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE: 58 case RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE: 59 return IMPL_BAD_INPUT_CONTINUE; 60 61 case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE: 62 if (*pDestBufPtr != pDestBufEnd) 63 { 64 *(*pDestBufPtr)++ = RTL_TEXTCVT_BYTE_PRIVATE_START 65 | ((sal_uChar) cByte); 66 return IMPL_BAD_INPUT_CONTINUE; 67 } 68 else 69 return IMPL_BAD_INPUT_NO_OUTPUT; 70 71 default: /* RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT, 72 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT, 73 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT */ 74 if (*pDestBufPtr != pDestBufEnd) 75 { 76 *(*pDestBufPtr)++ = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 77 return IMPL_BAD_INPUT_CONTINUE; 78 } 79 else 80 return IMPL_BAD_INPUT_NO_OUTPUT; 81 } 82 } 83 84 ImplBadInputConversionAction 85 ImplHandleBadInputUnicodeToTextConversion(sal_Bool bUndefined, 86 sal_uInt32 nUtf32, 87 sal_uInt32 nFlags, 88 sal_Char ** pDestBufPtr, 89 sal_Char * pDestBufEnd, 90 sal_uInt32 * pInfo, 91 sal_Char const * pPrefix, 92 sal_Size nPrefixLen, 93 sal_Bool * pPrefixWritten) 94 { 95 /* TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE 96 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR */ 97 98 sal_Char cReplace; 99 100 if (bUndefined) 101 { 102 if (ImplIsControlOrFormat(nUtf32)) 103 { 104 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0) 105 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; 106 } 107 else if (ImplIsPrivateUse(nUtf32)) 108 { 109 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0) 110 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; 111 else if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0) != 0) 112 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0; 113 } 114 else if (ImplIsZeroWidth(nUtf32)) 115 { 116 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0) 117 nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; 118 } 119 } 120 *pInfo |= bUndefined ? RTL_UNICODETOTEXT_INFO_UNDEFINED : 121 RTL_UNICODETOTEXT_INFO_INVALID; 122 switch (nFlags & (bUndefined ? RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK : 123 RTL_UNICODETOTEXT_FLAGS_INVALID_MASK)) 124 { 125 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR: 126 case RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR: 127 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; 128 return IMPL_BAD_INPUT_STOP; 129 130 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE: 131 case RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE: 132 if (pPrefixWritten) 133 *pPrefixWritten = sal_False; 134 return IMPL_BAD_INPUT_CONTINUE; 135 136 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: 137 case RTL_UNICODETOTEXT_FLAGS_INVALID_0: 138 cReplace = 0; 139 break; 140 141 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: 142 case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: 143 default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT, 144 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ 145 cReplace = '?'; 146 break; 147 148 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: 149 case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: 150 cReplace = '_'; 151 break; 152 } 153 if ((sal_Size) (pDestBufEnd - *pDestBufPtr) > nPrefixLen) 154 { 155 while (nPrefixLen-- > 0) 156 *(*pDestBufPtr)++ = *pPrefix++; 157 *(*pDestBufPtr)++ = cReplace; 158 if (pPrefixWritten) 159 *pPrefixWritten = sal_True; 160 return IMPL_BAD_INPUT_CONTINUE; 161 } 162 else 163 return IMPL_BAD_INPUT_NO_OUTPUT; 164 } 165