1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_sal.hxx" 30 31 #include "context.h" 32 #include "converter.h" 33 #include "convertsinglebytetobmpunicode.hxx" 34 #include "unichars.h" 35 36 #include "osl/diagnose.h" 37 #include "rtl/textcvt.h" 38 #include "sal/types.h" 39 40 #include <cstddef> 41 42 sal_Size rtl_textenc_convertSingleByteToBmpUnicode( 43 ImplTextConverterData const * data, void *, sal_Char const * srcBuf, 44 sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars, 45 sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes) 46 { 47 sal_Unicode const * map = static_cast< 48 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 49 data)->byteToUnicode; 50 sal_uInt32 infoFlags = 0; 51 sal_Size converted = 0; 52 sal_Unicode * destBufPtr = destBuf; 53 sal_Unicode * destBufEnd = destBuf + destChars; 54 for (; converted < srcBytes; ++converted) { 55 bool undefined = true; 56 sal_Char b = *srcBuf++; 57 sal_Unicode c = map[static_cast< sal_uInt8 >(b)]; 58 if (c == 0xFFFF) { 59 goto bad_input; 60 } 61 if (destBufEnd - destBufPtr < 1) { 62 goto no_output; 63 } 64 *destBufPtr++ = c; 65 continue; 66 bad_input: 67 switch (ImplHandleBadInputTextToUnicodeConversion( 68 undefined, false, b, flags, &destBufPtr, destBufEnd, 69 &infoFlags)) 70 { 71 case IMPL_BAD_INPUT_STOP: 72 break; 73 74 case IMPL_BAD_INPUT_CONTINUE: 75 continue; 76 77 case IMPL_BAD_INPUT_NO_OUTPUT: 78 goto no_output; 79 } 80 break; 81 no_output: 82 --srcBuf; 83 infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 84 break; 85 } 86 if (info != 0) { 87 *info = infoFlags; 88 } 89 if (srcCvtBytes != 0) { 90 *srcCvtBytes = converted; 91 } 92 return destBufPtr - destBuf; 93 } 94 95 sal_Size rtl_textenc_convertBmpUnicodeToSingleByte( 96 ImplTextConverterData const * data, void * context, 97 sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf, 98 sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info, 99 sal_Size * srcCvtChars) 100 { 101 std::size_t entries = static_cast< 102 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 103 data)->unicodeToByteEntries; 104 rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast< 105 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 106 data)->unicodeToByte; 107 sal_Unicode highSurrogate = 0; 108 sal_uInt32 infoFlags = 0; 109 sal_Size converted = 0; 110 sal_Char * destBufPtr = destBuf; 111 sal_Char * destBufEnd = destBuf + destBytes; 112 if (context != 0) { 113 highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)-> 114 m_nHighSurrogate; 115 } 116 for (; converted < srcChars; ++converted) { 117 bool undefined = true; 118 sal_uInt32 c = *srcBuf++; 119 if (highSurrogate == 0) { 120 if (ImplIsHighSurrogate(c)) { 121 highSurrogate = static_cast< sal_Unicode >(c); 122 continue; 123 } 124 } else if (ImplIsLowSurrogate(c)) { 125 c = ImplCombineSurrogates(highSurrogate, c); 126 } else { 127 undefined = false; 128 goto bad_input; 129 } 130 if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) { 131 undefined = false; 132 goto bad_input; 133 } 134 // Linearly searching through the ranges if probably fastest, assuming 135 // that most converted characters belong to the ASCII subset: 136 for (std::size_t i = 0; i < entries; ++i) { 137 if (c < ranges[i].unicode) { 138 break; 139 } else if (c <= sal::static_int_cast< sal_uInt32 >( 140 ranges[i].unicode + ranges[i].range)) 141 { 142 if (destBufEnd - destBufPtr < 1) { 143 goto no_output; 144 } 145 *destBufPtr++ = static_cast< sal_Char >( 146 ranges[i].byte + (c - ranges[i].unicode)); 147 goto done; 148 } 149 } 150 goto bad_input; 151 done: 152 highSurrogate = 0; 153 continue; 154 bad_input: 155 switch (ImplHandleBadInputUnicodeToTextConversion( 156 undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0, 157 0, 0)) 158 { 159 case IMPL_BAD_INPUT_STOP: 160 highSurrogate = 0; 161 break; 162 163 case IMPL_BAD_INPUT_CONTINUE: 164 highSurrogate = 0; 165 continue; 166 167 case IMPL_BAD_INPUT_NO_OUTPUT: 168 goto no_output; 169 } 170 break; 171 no_output: 172 --srcBuf; 173 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 174 break; 175 } 176 if (highSurrogate != 0 177 && ((infoFlags 178 & (RTL_UNICODETOTEXT_INFO_ERROR 179 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 180 == 0)) 181 { 182 if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) { 183 infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 184 } else { 185 switch (ImplHandleBadInputUnicodeToTextConversion( 186 false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0, 187 0, 0)) 188 { 189 case IMPL_BAD_INPUT_STOP: 190 case IMPL_BAD_INPUT_CONTINUE: 191 highSurrogate = 0; 192 break; 193 194 case IMPL_BAD_INPUT_NO_OUTPUT: 195 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 196 break; 197 } 198 } 199 } 200 if (context != 0) { 201 static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate 202 = highSurrogate; 203 } 204 if (info != 0) { 205 *info = infoFlags; 206 } 207 if (srcCvtChars != 0) { 208 *srcCvtChars = converted; 209 } 210 return destBufPtr - destBuf; 211 } 212