1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include "convertbig5hkscs.h" 29 #include "context.h" 30 #include "converter.h" 31 #include "tenchelp.h" 32 #include "unichars.h" 33 #include "osl/diagnose.h" 34 #include "rtl/alloc.h" 35 #include "rtl/textcvt.h" 36 #include "sal/types.h" 37 38 typedef struct 39 { 40 sal_Int32 m_nRow; /* 0--255; 0 means none */ 41 } ImplBig5HkscsToUnicodeContext; 42 43 void * ImplCreateBig5HkscsToUnicodeContext(void) 44 { 45 void * pContext 46 = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext)); 47 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0; 48 return pContext; 49 } 50 51 void ImplResetBig5HkscsToUnicodeContext(void * pContext) 52 { 53 if (pContext) 54 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0; 55 } 56 57 sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData, 58 void * pContext, 59 sal_Char const * pSrcBuf, 60 sal_Size nSrcBytes, 61 sal_Unicode * pDestBuf, 62 sal_Size nDestChars, 63 sal_uInt32 nFlags, 64 sal_uInt32 * pInfo, 65 sal_Size * pSrcCvtBytes) 66 { 67 sal_uInt16 const * pBig5Hkscs2001Data 68 = ((ImplBig5HkscsConverterData const *) pData)-> 69 m_pBig5Hkscs2001ToUnicodeData; 70 sal_Int32 const * pBig5Hkscs2001RowOffsets 71 = ((ImplBig5HkscsConverterData const *) pData)-> 72 m_pBig5Hkscs2001ToUnicodeRowOffsets; 73 ImplDBCSToUniLeadTab const * pBig5Data 74 = ((ImplBig5HkscsConverterData const *) pData)-> 75 m_pBig5ToUnicodeData; 76 sal_Int32 nRow = 0; 77 sal_uInt32 nInfo = 0; 78 sal_Size nConverted = 0; 79 sal_Unicode * pDestBufPtr = pDestBuf; 80 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; 81 82 if (pContext) 83 nRow = ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow; 84 85 for (; nConverted < nSrcBytes; ++nConverted) 86 { 87 sal_Bool bUndefined = sal_True; 88 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; 89 if (nRow == 0) 90 if (nChar < 0x80) 91 if (pDestBufPtr != pDestBufEnd) 92 *pDestBufPtr++ = (sal_Unicode) nChar; 93 else 94 goto no_output; 95 else if (nChar >= 0x81 && nChar <= 0xFE) 96 nRow = nChar; 97 else 98 { 99 bUndefined = sal_False; 100 goto bad_input; 101 } 102 else 103 if ((nChar >= 0x40 && nChar <= 0x7E) 104 || (nChar >= 0xA1 && nChar <= 0xFE)) 105 { 106 sal_uInt32 nUnicode = 0xFFFF; 107 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow]; 108 sal_uInt32 nFirst=0; 109 sal_uInt32 nLast=0; 110 if (nOffset != -1) 111 { 112 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++]; 113 nFirst = nFirstLast & 0xFF; 114 nLast = nFirstLast >> 8; 115 if (nChar >= nFirst && nChar <= nLast) 116 nUnicode 117 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)]; 118 } 119 if (nUnicode == 0xFFFF) 120 { 121 sal_uInt32 nFirst = pBig5Data[nRow].mnTrailStart; 122 if (nChar >= nFirst 123 && nChar <= pBig5Data[nRow].mnTrailEnd) 124 { 125 nUnicode 126 = pBig5Data[nRow].mpToUniTrailTab[nChar - nFirst]; 127 if (nUnicode == 0) 128 nUnicode = 0xFFFF; 129 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode)); 130 } 131 } 132 if (nUnicode == 0xFFFF) 133 { 134 ImplDBCSEUDCData const * p 135 = ((ImplBig5HkscsConverterData const *) pData)-> 136 m_pEudcData; 137 sal_uInt32 nCount 138 = ((ImplBig5HkscsConverterData const *) pData)-> 139 m_nEudcCount; 140 sal_uInt32 i; 141 for (i = 0; i < nCount; ++i) 142 { 143 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd) 144 { 145 if (nChar < p->mnTrail1Start) 146 break; 147 if (nChar <= p->mnTrail1End) 148 { 149 nUnicode 150 = p->mnUniStart 151 + (nRow - p->mnLeadStart) 152 * p->mnTrailRangeCount 153 + (nChar - p->mnTrail1Start); 154 break; 155 } 156 if (p->mnTrailCount < 2 157 || nChar < p->mnTrail2Start) 158 break; 159 if (nChar <= p->mnTrail2End) 160 { 161 nUnicode 162 = p->mnUniStart 163 + (nRow - p->mnLeadStart) 164 * p->mnTrailRangeCount 165 + (nChar - p->mnTrail2Start) 166 + (p->mnTrail1End - p->mnTrail1Start 167 + 1); 168 break; 169 } 170 if (p->mnTrailCount < 3 171 || nChar < p->mnTrail3Start) 172 break; 173 if (nChar <= p->mnTrail3End) 174 { 175 nUnicode 176 = p->mnUniStart 177 + (nRow - p->mnLeadStart) 178 * p->mnTrailRangeCount 179 + (nChar - p->mnTrail3Start) 180 + (p->mnTrail1End - p->mnTrail1Start 181 + 1) 182 + (p->mnTrail2End - p->mnTrail2Start 183 + 1); 184 break; 185 } 186 break; 187 } 188 ++p; 189 } 190 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode)); 191 } 192 if (nUnicode == 0xFFFF) 193 goto bad_input; 194 if (ImplIsHighSurrogate(nUnicode)) 195 if (pDestBufEnd - pDestBufPtr >= 2) 196 { 197 nOffset += nLast - nFirst + 1; 198 nFirst = pBig5Hkscs2001Data[nOffset++]; 199 *pDestBufPtr++ = (sal_Unicode) nUnicode; 200 *pDestBufPtr++ 201 = (sal_Unicode) pBig5Hkscs2001Data[ 202 nOffset + (nChar - nFirst)]; 203 } 204 else 205 goto no_output; 206 else 207 if (pDestBufPtr != pDestBufEnd) 208 *pDestBufPtr++ = (sal_Unicode) nUnicode; 209 else 210 goto no_output; 211 nRow = 0; 212 } 213 else 214 { 215 bUndefined = sal_False; 216 goto bad_input; 217 } 218 continue; 219 220 bad_input: 221 switch (ImplHandleBadInputTextToUnicodeConversion( 222 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, 223 &nInfo)) 224 { 225 case IMPL_BAD_INPUT_STOP: 226 nRow = 0; 227 break; 228 229 case IMPL_BAD_INPUT_CONTINUE: 230 nRow = 0; 231 continue; 232 233 case IMPL_BAD_INPUT_NO_OUTPUT: 234 goto no_output; 235 } 236 break; 237 238 no_output: 239 --pSrcBuf; 240 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 241 break; 242 } 243 244 if (nRow != 0 245 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR 246 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) 247 == 0) 248 { 249 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) 250 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 251 else 252 switch (ImplHandleBadInputTextToUnicodeConversion( 253 sal_False, sal_True, 0, nFlags, &pDestBufPtr, 254 pDestBufEnd, &nInfo)) 255 { 256 case IMPL_BAD_INPUT_STOP: 257 case IMPL_BAD_INPUT_CONTINUE: 258 nRow = 0; 259 break; 260 261 case IMPL_BAD_INPUT_NO_OUTPUT: 262 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 263 break; 264 } 265 } 266 267 if (pContext) 268 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = nRow; 269 if (pInfo) 270 *pInfo = nInfo; 271 if (pSrcCvtBytes) 272 *pSrcCvtBytes = nConverted; 273 274 return pDestBufPtr - pDestBuf; 275 } 276 277 sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData, 278 void * pContext, 279 sal_Unicode const * pSrcBuf, 280 sal_Size nSrcChars, 281 sal_Char * pDestBuf, 282 sal_Size nDestBytes, 283 sal_uInt32 nFlags, 284 sal_uInt32 * pInfo, 285 sal_Size * pSrcCvtChars) 286 { 287 sal_uInt16 const * pBig5Hkscs2001Data 288 = ((ImplBig5HkscsConverterData const *) pData)-> 289 m_pUnicodeToBig5Hkscs2001Data; 290 sal_Int32 const * pBig5Hkscs2001PageOffsets 291 = ((ImplBig5HkscsConverterData const *) pData)-> 292 m_pUnicodeToBig5Hkscs2001PageOffsets; 293 sal_Int32 const * pBig5Hkscs2001PlaneOffsets 294 = ((ImplBig5HkscsConverterData const *) pData)-> 295 m_pUnicodeToBig5Hkscs2001PlaneOffsets; 296 ImplUniToDBCSHighTab const * pBig5Data 297 = ((ImplBig5HkscsConverterData const *) pData)-> 298 m_pUnicodeToBig5Data; 299 sal_Unicode nHighSurrogate = 0; 300 sal_uInt32 nInfo = 0; 301 sal_Size nConverted = 0; 302 sal_Char * pDestBufPtr = pDestBuf; 303 sal_Char * pDestBufEnd = pDestBuf + nDestBytes; 304 305 if (pContext) 306 nHighSurrogate 307 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; 308 309 for (; nConverted < nSrcChars; ++nConverted) 310 { 311 sal_Bool bUndefined = sal_True; 312 sal_uInt32 nChar = *pSrcBuf++; 313 if (nHighSurrogate == 0) 314 { 315 if (ImplIsHighSurrogate(nChar)) 316 { 317 nHighSurrogate = (sal_Unicode) nChar; 318 continue; 319 } 320 } 321 else if (ImplIsLowSurrogate(nChar)) 322 nChar = ImplCombineSurrogates(nHighSurrogate, nChar); 323 else 324 { 325 bUndefined = sal_False; 326 goto bad_input; 327 } 328 329 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) 330 { 331 bUndefined = sal_False; 332 goto bad_input; 333 } 334 335 if (nChar < 0x80) 336 if (pDestBufPtr != pDestBufEnd) 337 *pDestBufPtr++ = (sal_Char) nChar; 338 else 339 goto no_output; 340 else 341 { 342 sal_uInt32 nBytes = 0; 343 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16]; 344 if (nOffset != -1) 345 { 346 nOffset 347 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00) 348 >> 8)]; 349 if (nOffset != -1) 350 { 351 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++]; 352 sal_uInt32 nFirst = nFirstLast & 0xFF; 353 sal_uInt32 nLast = nFirstLast >> 8; 354 sal_uInt32 nIndex = nChar & 0xFF; 355 if (nIndex >= nFirst && nIndex <= nLast) 356 { 357 nBytes 358 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)]; 359 } 360 } 361 } 362 if (nBytes == 0) 363 { 364 sal_uInt32 nIndex1 = nChar >> 8; 365 if (nIndex1 < 0x100) 366 { 367 sal_uInt32 nIndex2 = nChar & 0xFF; 368 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart; 369 if (nIndex2 >= nFirst 370 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd) 371 nBytes = pBig5Data[nIndex1]. 372 mpToUniTrailTab[nIndex2 - nFirst]; 373 } 374 } 375 if (nBytes == 0) 376 { 377 ImplDBCSEUDCData const * p 378 = ((ImplBig5HkscsConverterData const *) pData)-> 379 m_pEudcData; 380 sal_uInt32 nCount 381 = ((ImplBig5HkscsConverterData const *) pData)-> 382 m_nEudcCount; 383 sal_uInt32 i; 384 for (i = 0; i < nCount; ++i) { 385 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd) 386 { 387 sal_uInt32 nIndex = nChar - p->mnUniStart; 388 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount; 389 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount; 390 sal_uInt32 nSize; 391 nBytes = (p->mnLeadStart + nLeadOff) << 8; 392 nSize = p->mnTrail1End - p->mnTrail1Start + 1; 393 if (nTrailOff < nSize) 394 { 395 nBytes |= p->mnTrail1Start + nTrailOff; 396 break; 397 } 398 nTrailOff -= nSize; 399 nSize = p->mnTrail2End - p->mnTrail2Start + 1; 400 if (nTrailOff < nSize) 401 { 402 nBytes |= p->mnTrail2Start + nTrailOff; 403 break; 404 } 405 nTrailOff -= nSize; 406 nBytes |= p->mnTrail3Start + nTrailOff; 407 break; 408 } 409 ++p; 410 } 411 } 412 if (nBytes == 0) 413 goto bad_input; 414 if (pDestBufEnd - pDestBufPtr >= 2) 415 { 416 *pDestBufPtr++ = (sal_Char) (nBytes >> 8); 417 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF); 418 } 419 else 420 goto no_output; 421 } 422 nHighSurrogate = 0; 423 continue; 424 425 bad_input: 426 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, 427 nChar, 428 nFlags, 429 &pDestBufPtr, 430 pDestBufEnd, 431 &nInfo, 432 NULL, 433 0, 434 NULL)) 435 { 436 case IMPL_BAD_INPUT_STOP: 437 nHighSurrogate = 0; 438 break; 439 440 case IMPL_BAD_INPUT_CONTINUE: 441 nHighSurrogate = 0; 442 continue; 443 444 case IMPL_BAD_INPUT_NO_OUTPUT: 445 goto no_output; 446 } 447 break; 448 449 no_output: 450 --pSrcBuf; 451 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 452 break; 453 } 454 455 if (nHighSurrogate != 0 456 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR 457 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 458 == 0) 459 { 460 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) 461 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 462 else 463 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 464 0, 465 nFlags, 466 &pDestBufPtr, 467 pDestBufEnd, 468 &nInfo, 469 NULL, 470 0, 471 NULL)) 472 { 473 case IMPL_BAD_INPUT_STOP: 474 case IMPL_BAD_INPUT_CONTINUE: 475 nHighSurrogate = 0; 476 break; 477 478 case IMPL_BAD_INPUT_NO_OUTPUT: 479 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 480 break; 481 } 482 } 483 484 if (pContext) 485 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate 486 = nHighSurrogate; 487 if (pInfo) 488 *pInfo = nInfo; 489 if (pSrcCvtChars) 490 *pSrcCvtChars = nConverted; 491 492 return pDestBufPtr - pDestBuf; 493 } 494