xref: /aoo41x/main/sal/textenc/convertbig5hkscs.c (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include "convertbig5hkscs.h"
29 #include "context.h"
30 #include "converter.h"
31 #include "tenchelp.h"
32 #include "unichars.h"
33 #include "osl/diagnose.h"
34 #include "rtl/alloc.h"
35 #include "rtl/textcvt.h"
36 #include "sal/types.h"
37 
38 typedef struct
39 {
40     sal_Int32 m_nRow; /* 0--255; 0 means none */
41 } ImplBig5HkscsToUnicodeContext;
42 
43 void * ImplCreateBig5HkscsToUnicodeContext(void)
44 {
45     void * pContext
46         = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext));
47     ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
48     return pContext;
49 }
50 
51 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
52 {
53     if (pContext)
54         ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
55 }
56 
57 sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData,
58                                        void * pContext,
59                                        sal_Char const * pSrcBuf,
60                                        sal_Size nSrcBytes,
61                                        sal_Unicode * pDestBuf,
62                                        sal_Size nDestChars,
63                                        sal_uInt32 nFlags,
64                                        sal_uInt32 * pInfo,
65                                        sal_Size * pSrcCvtBytes)
66 {
67     sal_uInt16 const * pBig5Hkscs2001Data
68         = ((ImplBig5HkscsConverterData const *) pData)->
69               m_pBig5Hkscs2001ToUnicodeData;
70     sal_Int32 const * pBig5Hkscs2001RowOffsets
71         = ((ImplBig5HkscsConverterData const *) pData)->
72               m_pBig5Hkscs2001ToUnicodeRowOffsets;
73     ImplDBCSToUniLeadTab const * pBig5Data
74         = ((ImplBig5HkscsConverterData const *) pData)->
75               m_pBig5ToUnicodeData;
76     sal_Int32 nRow = 0;
77     sal_uInt32 nInfo = 0;
78     sal_Size nConverted = 0;
79     sal_Unicode * pDestBufPtr = pDestBuf;
80     sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
81 
82     if (pContext)
83         nRow = ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow;
84 
85     for (; nConverted < nSrcBytes; ++nConverted)
86     {
87         sal_Bool bUndefined = sal_True;
88         sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
89         if (nRow == 0)
90             if (nChar < 0x80)
91                 if (pDestBufPtr != pDestBufEnd)
92                     *pDestBufPtr++ = (sal_Unicode) nChar;
93                 else
94                     goto no_output;
95             else if (nChar >= 0x81 && nChar <= 0xFE)
96                 nRow = nChar;
97             else
98             {
99                 bUndefined = sal_False;
100                 goto bad_input;
101             }
102         else
103             if ((nChar >= 0x40 && nChar <= 0x7E)
104                 || (nChar >= 0xA1 && nChar <= 0xFE))
105             {
106                 sal_uInt32 nUnicode = 0xFFFF;
107                 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
108                 sal_uInt32 nFirst=0;
109                 sal_uInt32 nLast=0;
110                 if (nOffset != -1)
111                 {
112                     sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
113                     nFirst = nFirstLast & 0xFF;
114                     nLast = nFirstLast >> 8;
115                     if (nChar >= nFirst && nChar <= nLast)
116                         nUnicode
117                             = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
118                 }
119                 if (nUnicode == 0xFFFF)
120                 {
121                     sal_uInt32 nFirst = pBig5Data[nRow].mnTrailStart;
122                     if (nChar >= nFirst
123                         && nChar <= pBig5Data[nRow].mnTrailEnd)
124                     {
125                         nUnicode
126                             = pBig5Data[nRow].mpToUniTrailTab[nChar - nFirst];
127                         if (nUnicode == 0)
128                             nUnicode = 0xFFFF;
129                         OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
130                     }
131                 }
132                 if (nUnicode == 0xFFFF)
133                 {
134                     ImplDBCSEUDCData const * p
135                         = ((ImplBig5HkscsConverterData const *) pData)->
136                               m_pEudcData;
137                     sal_uInt32 nCount
138                         = ((ImplBig5HkscsConverterData const *) pData)->
139                               m_nEudcCount;
140                     sal_uInt32 i;
141                     for (i = 0; i < nCount; ++i)
142                     {
143                         if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
144                         {
145                             if (nChar < p->mnTrail1Start)
146                                 break;
147                             if (nChar <= p->mnTrail1End)
148                             {
149                                 nUnicode
150                                     = p->mnUniStart
151                                           + (nRow - p->mnLeadStart)
152                                                 * p->mnTrailRangeCount
153                                           + (nChar - p->mnTrail1Start);
154                                 break;
155                             }
156                             if (p->mnTrailCount < 2
157                                 || nChar < p->mnTrail2Start)
158                                 break;
159                             if (nChar <= p->mnTrail2End)
160                             {
161                                 nUnicode
162                                     = p->mnUniStart
163                                           + (nRow - p->mnLeadStart)
164                                                 * p->mnTrailRangeCount
165                                           + (nChar - p->mnTrail2Start)
166                                           + (p->mnTrail1End - p->mnTrail1Start
167                                                  + 1);
168                                 break;
169                             }
170                             if (p->mnTrailCount < 3
171                                 || nChar < p->mnTrail3Start)
172                                 break;
173                             if (nChar <= p->mnTrail3End)
174                             {
175                                 nUnicode
176                                     = p->mnUniStart
177                                           + (nRow - p->mnLeadStart)
178                                                 * p->mnTrailRangeCount
179                                           + (nChar - p->mnTrail3Start)
180                                           + (p->mnTrail1End - p->mnTrail1Start
181                                                  + 1)
182                                           + (p->mnTrail2End - p->mnTrail2Start
183                                                  + 1);
184                                 break;
185                             }
186                             break;
187                         }
188                         ++p;
189                     }
190                     OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
191                 }
192                 if (nUnicode == 0xFFFF)
193                     goto bad_input;
194                 if (ImplIsHighSurrogate(nUnicode))
195                     if (pDestBufEnd - pDestBufPtr >= 2)
196                     {
197                         nOffset += nLast - nFirst + 1;
198                         nFirst = pBig5Hkscs2001Data[nOffset++];
199                         *pDestBufPtr++ = (sal_Unicode) nUnicode;
200                         *pDestBufPtr++
201                             = (sal_Unicode) pBig5Hkscs2001Data[
202                                                 nOffset + (nChar - nFirst)];
203                     }
204                     else
205                         goto no_output;
206                 else
207                     if (pDestBufPtr != pDestBufEnd)
208                         *pDestBufPtr++ = (sal_Unicode) nUnicode;
209                     else
210                         goto no_output;
211                 nRow = 0;
212             }
213             else
214             {
215                 bUndefined = sal_False;
216                 goto bad_input;
217             }
218         continue;
219 
220     bad_input:
221         switch (ImplHandleBadInputTextToUnicodeConversion(
222                     bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
223                     &nInfo))
224         {
225         case IMPL_BAD_INPUT_STOP:
226             nRow = 0;
227             break;
228 
229         case IMPL_BAD_INPUT_CONTINUE:
230             nRow = 0;
231             continue;
232 
233         case IMPL_BAD_INPUT_NO_OUTPUT:
234             goto no_output;
235         }
236         break;
237 
238     no_output:
239         --pSrcBuf;
240         nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
241         break;
242     }
243 
244     if (nRow != 0
245         && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
246                          | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
247                == 0)
248     {
249         if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
250             nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
251         else
252             switch (ImplHandleBadInputTextToUnicodeConversion(
253                         sal_False, sal_True, 0, nFlags, &pDestBufPtr,
254                         pDestBufEnd, &nInfo))
255             {
256             case IMPL_BAD_INPUT_STOP:
257             case IMPL_BAD_INPUT_CONTINUE:
258                 nRow = 0;
259                 break;
260 
261             case IMPL_BAD_INPUT_NO_OUTPUT:
262                 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
263                 break;
264             }
265     }
266 
267     if (pContext)
268         ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = nRow;
269     if (pInfo)
270         *pInfo = nInfo;
271     if (pSrcCvtBytes)
272         *pSrcCvtBytes = nConverted;
273 
274     return pDestBufPtr - pDestBuf;
275 }
276 
277 sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData,
278                                        void * pContext,
279                                        sal_Unicode const * pSrcBuf,
280                                        sal_Size nSrcChars,
281                                        sal_Char * pDestBuf,
282                                        sal_Size nDestBytes,
283                                        sal_uInt32 nFlags,
284                                        sal_uInt32 * pInfo,
285                                        sal_Size * pSrcCvtChars)
286 {
287     sal_uInt16 const * pBig5Hkscs2001Data
288         = ((ImplBig5HkscsConverterData const *) pData)->
289               m_pUnicodeToBig5Hkscs2001Data;
290     sal_Int32 const * pBig5Hkscs2001PageOffsets
291         = ((ImplBig5HkscsConverterData const *) pData)->
292               m_pUnicodeToBig5Hkscs2001PageOffsets;
293     sal_Int32 const * pBig5Hkscs2001PlaneOffsets
294         = ((ImplBig5HkscsConverterData const *) pData)->
295               m_pUnicodeToBig5Hkscs2001PlaneOffsets;
296     ImplUniToDBCSHighTab const * pBig5Data
297         = ((ImplBig5HkscsConverterData const *) pData)->
298               m_pUnicodeToBig5Data;
299     sal_Unicode nHighSurrogate = 0;
300     sal_uInt32 nInfo = 0;
301     sal_Size nConverted = 0;
302     sal_Char * pDestBufPtr = pDestBuf;
303     sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
304 
305     if (pContext)
306         nHighSurrogate
307             = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
308 
309     for (; nConverted < nSrcChars; ++nConverted)
310     {
311         sal_Bool bUndefined = sal_True;
312         sal_uInt32 nChar = *pSrcBuf++;
313         if (nHighSurrogate == 0)
314         {
315             if (ImplIsHighSurrogate(nChar))
316             {
317                 nHighSurrogate = (sal_Unicode) nChar;
318                 continue;
319             }
320         }
321         else if (ImplIsLowSurrogate(nChar))
322             nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
323         else
324         {
325             bUndefined = sal_False;
326             goto bad_input;
327         }
328 
329         if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
330         {
331             bUndefined = sal_False;
332             goto bad_input;
333         }
334 
335         if (nChar < 0x80)
336             if (pDestBufPtr != pDestBufEnd)
337                 *pDestBufPtr++ = (sal_Char) nChar;
338             else
339                 goto no_output;
340         else
341         {
342             sal_uInt32 nBytes = 0;
343             sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
344             if (nOffset != -1)
345             {
346                 nOffset
347                     = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
348                                                                >> 8)];
349                 if (nOffset != -1)
350                 {
351                     sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
352                     sal_uInt32 nFirst = nFirstLast & 0xFF;
353                     sal_uInt32 nLast = nFirstLast >> 8;
354                     sal_uInt32 nIndex = nChar & 0xFF;
355                     if (nIndex >= nFirst && nIndex <= nLast)
356                     {
357                         nBytes
358                             = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
359                     }
360                 }
361             }
362             if (nBytes == 0)
363             {
364                 sal_uInt32 nIndex1 = nChar >> 8;
365                 if (nIndex1 < 0x100)
366                 {
367                     sal_uInt32 nIndex2 = nChar & 0xFF;
368                     sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
369                     if (nIndex2 >= nFirst
370                         && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
371                         nBytes = pBig5Data[nIndex1].
372                                      mpToUniTrailTab[nIndex2 - nFirst];
373                 }
374             }
375             if (nBytes == 0)
376             {
377                 ImplDBCSEUDCData const * p
378                     = ((ImplBig5HkscsConverterData const *) pData)->
379                           m_pEudcData;
380                 sal_uInt32 nCount
381                     = ((ImplBig5HkscsConverterData const *) pData)->
382                           m_nEudcCount;
383                 sal_uInt32 i;
384                 for (i = 0; i < nCount; ++i) {
385                     if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
386                     {
387                         sal_uInt32 nIndex = nChar - p->mnUniStart;
388                         sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
389                         sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
390                         sal_uInt32 nSize;
391                         nBytes = (p->mnLeadStart + nLeadOff) << 8;
392                         nSize = p->mnTrail1End - p->mnTrail1Start + 1;
393                         if (nTrailOff < nSize)
394                         {
395                             nBytes |= p->mnTrail1Start + nTrailOff;
396                             break;
397                         }
398                         nTrailOff -= nSize;
399                         nSize = p->mnTrail2End - p->mnTrail2Start + 1;
400                         if (nTrailOff < nSize)
401                         {
402                             nBytes |= p->mnTrail2Start + nTrailOff;
403                             break;
404                         }
405                         nTrailOff -= nSize;
406                         nBytes |= p->mnTrail3Start + nTrailOff;
407                         break;
408                     }
409                     ++p;
410                 }
411             }
412             if (nBytes == 0)
413                 goto bad_input;
414             if (pDestBufEnd - pDestBufPtr >= 2)
415             {
416                 *pDestBufPtr++ = (sal_Char) (nBytes >> 8);
417                 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF);
418             }
419             else
420                 goto no_output;
421         }
422         nHighSurrogate = 0;
423         continue;
424 
425     bad_input:
426         switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined,
427                                                           nChar,
428                                                           nFlags,
429                                                           &pDestBufPtr,
430                                                           pDestBufEnd,
431                                                           &nInfo,
432                                                           NULL,
433                                                           0,
434                                                           NULL))
435         {
436         case IMPL_BAD_INPUT_STOP:
437             nHighSurrogate = 0;
438             break;
439 
440         case IMPL_BAD_INPUT_CONTINUE:
441             nHighSurrogate = 0;
442             continue;
443 
444         case IMPL_BAD_INPUT_NO_OUTPUT:
445             goto no_output;
446         }
447         break;
448 
449     no_output:
450         --pSrcBuf;
451         nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
452         break;
453     }
454 
455     if (nHighSurrogate != 0
456         && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
457                          | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
458                == 0)
459     {
460         if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
461             nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
462         else
463             switch (ImplHandleBadInputUnicodeToTextConversion(sal_False,
464                                                               0,
465                                                               nFlags,
466                                                               &pDestBufPtr,
467                                                               pDestBufEnd,
468                                                               &nInfo,
469                                                               NULL,
470                                                               0,
471                                                               NULL))
472             {
473             case IMPL_BAD_INPUT_STOP:
474             case IMPL_BAD_INPUT_CONTINUE:
475                 nHighSurrogate = 0;
476                 break;
477 
478             case IMPL_BAD_INPUT_NO_OUTPUT:
479                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
480                 break;
481             }
482     }
483 
484     if (pContext)
485         ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
486             = nHighSurrogate;
487     if (pInfo)
488         *pInfo = nInfo;
489     if (pSrcCvtChars)
490         *pSrcCvtChars = nConverted;
491 
492     return pDestBufPtr - pDestBuf;
493 }
494