xref: /trunk/main/sal/textenc/convertbig5hkscs.c (revision 647f063d)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #include "convertbig5hkscs.h"
25 #include "context.h"
26 #include "converter.h"
27 #include "tenchelp.h"
28 #include "unichars.h"
29 #include "osl/diagnose.h"
30 #include "rtl/alloc.h"
31 #include "rtl/textcvt.h"
32 #include "sal/types.h"
33 
34 typedef struct
35 {
36     sal_Int32 m_nRow; /* 0--255; 0 means none */
37 } ImplBig5HkscsToUnicodeContext;
38 
ImplCreateBig5HkscsToUnicodeContext(void)39 void * ImplCreateBig5HkscsToUnicodeContext(void)
40 {
41     void * pContext
42         = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext));
43     ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
44     return pContext;
45 }
46 
ImplResetBig5HkscsToUnicodeContext(void * pContext)47 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
48 {
49     if (pContext)
50         ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
51 }
52 
ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData,void * pContext,sal_Char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)53 sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData,
54                                        void * pContext,
55                                        sal_Char const * pSrcBuf,
56                                        sal_Size nSrcBytes,
57                                        sal_Unicode * pDestBuf,
58                                        sal_Size nDestChars,
59                                        sal_uInt32 nFlags,
60                                        sal_uInt32 * pInfo,
61                                        sal_Size * pSrcCvtBytes)
62 {
63     sal_uInt16 const * pBig5Hkscs2001Data
64         = ((ImplBig5HkscsConverterData const *) pData)->
65               m_pBig5Hkscs2001ToUnicodeData;
66     sal_Int32 const * pBig5Hkscs2001RowOffsets
67         = ((ImplBig5HkscsConverterData const *) pData)->
68               m_pBig5Hkscs2001ToUnicodeRowOffsets;
69     ImplDBCSToUniLeadTab const * pBig5Data
70         = ((ImplBig5HkscsConverterData const *) pData)->
71               m_pBig5ToUnicodeData;
72     sal_Int32 nRow = 0;
73     sal_uInt32 nInfo = 0;
74     sal_Size nConverted = 0;
75     sal_Unicode * pDestBufPtr = pDestBuf;
76     sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
77 
78     if (pContext)
79         nRow = ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow;
80 
81     for (; nConverted < nSrcBytes; ++nConverted)
82     {
83         sal_Bool bUndefined = sal_True;
84         sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
85         if (nRow == 0)
86             if (nChar < 0x80)
87                 if (pDestBufPtr != pDestBufEnd)
88                     *pDestBufPtr++ = (sal_Unicode) nChar;
89                 else
90                     goto no_output;
91             else if (nChar >= 0x81 && nChar <= 0xFE)
92                 nRow = nChar;
93             else
94             {
95                 bUndefined = sal_False;
96                 goto bad_input;
97             }
98         else
99             if ((nChar >= 0x40 && nChar <= 0x7E)
100                 || (nChar >= 0xA1 && nChar <= 0xFE))
101             {
102                 sal_uInt32 nUnicode = 0xFFFF;
103                 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
104                 sal_uInt32 nFirst=0;
105                 sal_uInt32 nLast=0;
106                 if (nOffset != -1)
107                 {
108                     sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
109                     nFirst = nFirstLast & 0xFF;
110                     nLast = nFirstLast >> 8;
111                     if (nChar >= nFirst && nChar <= nLast)
112                         nUnicode
113                             = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
114                 }
115                 if (nUnicode == 0xFFFF)
116                 {
117                     sal_uInt32 nFirst = pBig5Data[nRow].mnTrailStart;
118                     if (nChar >= nFirst
119                         && nChar <= pBig5Data[nRow].mnTrailEnd)
120                     {
121                         nUnicode
122                             = pBig5Data[nRow].mpToUniTrailTab[nChar - nFirst];
123                         if (nUnicode == 0)
124                             nUnicode = 0xFFFF;
125                         OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
126                     }
127                 }
128                 if (nUnicode == 0xFFFF)
129                 {
130                     ImplDBCSEUDCData const * p
131                         = ((ImplBig5HkscsConverterData const *) pData)->
132                               m_pEudcData;
133                     sal_uInt32 nCount
134                         = ((ImplBig5HkscsConverterData const *) pData)->
135                               m_nEudcCount;
136                     sal_uInt32 i;
137                     for (i = 0; i < nCount; ++i)
138                     {
139                         if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
140                         {
141                             if (nChar < p->mnTrail1Start)
142                                 break;
143                             if (nChar <= p->mnTrail1End)
144                             {
145                                 nUnicode
146                                     = p->mnUniStart
147                                           + (nRow - p->mnLeadStart)
148                                                 * p->mnTrailRangeCount
149                                           + (nChar - p->mnTrail1Start);
150                                 break;
151                             }
152                             if (p->mnTrailCount < 2
153                                 || nChar < p->mnTrail2Start)
154                                 break;
155                             if (nChar <= p->mnTrail2End)
156                             {
157                                 nUnicode
158                                     = p->mnUniStart
159                                           + (nRow - p->mnLeadStart)
160                                                 * p->mnTrailRangeCount
161                                           + (nChar - p->mnTrail2Start)
162                                           + (p->mnTrail1End - p->mnTrail1Start
163                                                  + 1);
164                                 break;
165                             }
166                             if (p->mnTrailCount < 3
167                                 || nChar < p->mnTrail3Start)
168                                 break;
169                             if (nChar <= p->mnTrail3End)
170                             {
171                                 nUnicode
172                                     = p->mnUniStart
173                                           + (nRow - p->mnLeadStart)
174                                                 * p->mnTrailRangeCount
175                                           + (nChar - p->mnTrail3Start)
176                                           + (p->mnTrail1End - p->mnTrail1Start
177                                                  + 1)
178                                           + (p->mnTrail2End - p->mnTrail2Start
179                                                  + 1);
180                                 break;
181                             }
182                             break;
183                         }
184                         ++p;
185                     }
186                     OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
187                 }
188                 if (nUnicode == 0xFFFF)
189                     goto bad_input;
190                 if (ImplIsHighSurrogate(nUnicode))
191                     if (pDestBufEnd - pDestBufPtr >= 2)
192                     {
193                         nOffset += nLast - nFirst + 1;
194                         nFirst = pBig5Hkscs2001Data[nOffset++];
195                         *pDestBufPtr++ = (sal_Unicode) nUnicode;
196                         *pDestBufPtr++
197                             = (sal_Unicode) pBig5Hkscs2001Data[
198                                                 nOffset + (nChar - nFirst)];
199                     }
200                     else
201                         goto no_output;
202                 else
203                     if (pDestBufPtr != pDestBufEnd)
204                         *pDestBufPtr++ = (sal_Unicode) nUnicode;
205                     else
206                         goto no_output;
207                 nRow = 0;
208             }
209             else
210             {
211                 bUndefined = sal_False;
212                 goto bad_input;
213             }
214         continue;
215 
216     bad_input:
217         switch (ImplHandleBadInputTextToUnicodeConversion(
218                     bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
219                     &nInfo))
220         {
221         case IMPL_BAD_INPUT_STOP:
222             nRow = 0;
223             break;
224 
225         case IMPL_BAD_INPUT_CONTINUE:
226             nRow = 0;
227             continue;
228 
229         case IMPL_BAD_INPUT_NO_OUTPUT:
230             goto no_output;
231         }
232         break;
233 
234     no_output:
235         --pSrcBuf;
236         nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
237         break;
238     }
239 
240     if (nRow != 0
241         && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
242                          | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
243                == 0)
244     {
245         if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
246             nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
247         else
248             switch (ImplHandleBadInputTextToUnicodeConversion(
249                         sal_False, sal_True, 0, nFlags, &pDestBufPtr,
250                         pDestBufEnd, &nInfo))
251             {
252             case IMPL_BAD_INPUT_STOP:
253             case IMPL_BAD_INPUT_CONTINUE:
254                 nRow = 0;
255                 break;
256 
257             case IMPL_BAD_INPUT_NO_OUTPUT:
258                 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
259                 break;
260             }
261     }
262 
263     if (pContext)
264         ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = nRow;
265     if (pInfo)
266         *pInfo = nInfo;
267     if (pSrcCvtBytes)
268         *pSrcCvtBytes = nConverted;
269 
270     return pDestBufPtr - pDestBuf;
271 }
272 
ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)273 sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData,
274                                        void * pContext,
275                                        sal_Unicode const * pSrcBuf,
276                                        sal_Size nSrcChars,
277                                        sal_Char * pDestBuf,
278                                        sal_Size nDestBytes,
279                                        sal_uInt32 nFlags,
280                                        sal_uInt32 * pInfo,
281                                        sal_Size * pSrcCvtChars)
282 {
283     sal_uInt16 const * pBig5Hkscs2001Data
284         = ((ImplBig5HkscsConverterData const *) pData)->
285               m_pUnicodeToBig5Hkscs2001Data;
286     sal_Int32 const * pBig5Hkscs2001PageOffsets
287         = ((ImplBig5HkscsConverterData const *) pData)->
288               m_pUnicodeToBig5Hkscs2001PageOffsets;
289     sal_Int32 const * pBig5Hkscs2001PlaneOffsets
290         = ((ImplBig5HkscsConverterData const *) pData)->
291               m_pUnicodeToBig5Hkscs2001PlaneOffsets;
292     ImplUniToDBCSHighTab const * pBig5Data
293         = ((ImplBig5HkscsConverterData const *) pData)->
294               m_pUnicodeToBig5Data;
295     sal_Unicode nHighSurrogate = 0;
296     sal_uInt32 nInfo = 0;
297     sal_Size nConverted = 0;
298     sal_Char * pDestBufPtr = pDestBuf;
299     sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
300 
301     if (pContext)
302         nHighSurrogate
303             = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
304 
305     for (; nConverted < nSrcChars; ++nConverted)
306     {
307         sal_Bool bUndefined = sal_True;
308         sal_uInt32 nChar = *pSrcBuf++;
309         if (nHighSurrogate == 0)
310         {
311             if (ImplIsHighSurrogate(nChar))
312             {
313                 nHighSurrogate = (sal_Unicode) nChar;
314                 continue;
315             }
316         }
317         else if (ImplIsLowSurrogate(nChar))
318             nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
319         else
320         {
321             bUndefined = sal_False;
322             goto bad_input;
323         }
324 
325         if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
326         {
327             bUndefined = sal_False;
328             goto bad_input;
329         }
330 
331         if (nChar < 0x80)
332             if (pDestBufPtr != pDestBufEnd)
333                 *pDestBufPtr++ = (sal_Char) nChar;
334             else
335                 goto no_output;
336         else
337         {
338             sal_uInt32 nBytes = 0;
339             sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
340             if (nOffset != -1)
341             {
342                 nOffset
343                     = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
344                                                                >> 8)];
345                 if (nOffset != -1)
346                 {
347                     sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
348                     sal_uInt32 nFirst = nFirstLast & 0xFF;
349                     sal_uInt32 nLast = nFirstLast >> 8;
350                     sal_uInt32 nIndex = nChar & 0xFF;
351                     if (nIndex >= nFirst && nIndex <= nLast)
352                     {
353                         nBytes
354                             = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
355                     }
356                 }
357             }
358             if (nBytes == 0)
359             {
360                 sal_uInt32 nIndex1 = nChar >> 8;
361                 if (nIndex1 < 0x100)
362                 {
363                     sal_uInt32 nIndex2 = nChar & 0xFF;
364                     sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
365                     if (nIndex2 >= nFirst
366                         && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
367                         nBytes = pBig5Data[nIndex1].
368                                      mpToUniTrailTab[nIndex2 - nFirst];
369                 }
370             }
371             if (nBytes == 0)
372             {
373                 ImplDBCSEUDCData const * p
374                     = ((ImplBig5HkscsConverterData const *) pData)->
375                           m_pEudcData;
376                 sal_uInt32 nCount
377                     = ((ImplBig5HkscsConverterData const *) pData)->
378                           m_nEudcCount;
379                 sal_uInt32 i;
380                 for (i = 0; i < nCount; ++i) {
381                     if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
382                     {
383                         sal_uInt32 nIndex = nChar - p->mnUniStart;
384                         sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
385                         sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
386                         sal_uInt32 nSize;
387                         nBytes = (p->mnLeadStart + nLeadOff) << 8;
388                         nSize = p->mnTrail1End - p->mnTrail1Start + 1;
389                         if (nTrailOff < nSize)
390                         {
391                             nBytes |= p->mnTrail1Start + nTrailOff;
392                             break;
393                         }
394                         nTrailOff -= nSize;
395                         nSize = p->mnTrail2End - p->mnTrail2Start + 1;
396                         if (nTrailOff < nSize)
397                         {
398                             nBytes |= p->mnTrail2Start + nTrailOff;
399                             break;
400                         }
401                         nTrailOff -= nSize;
402                         nBytes |= p->mnTrail3Start + nTrailOff;
403                         break;
404                     }
405                     ++p;
406                 }
407             }
408             if (nBytes == 0)
409                 goto bad_input;
410             if (pDestBufEnd - pDestBufPtr >= 2)
411             {
412                 *pDestBufPtr++ = (sal_Char) (nBytes >> 8);
413                 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF);
414             }
415             else
416                 goto no_output;
417         }
418         nHighSurrogate = 0;
419         continue;
420 
421     bad_input:
422         switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined,
423                                                           nChar,
424                                                           nFlags,
425                                                           &pDestBufPtr,
426                                                           pDestBufEnd,
427                                                           &nInfo,
428                                                           NULL,
429                                                           0,
430                                                           NULL))
431         {
432         case IMPL_BAD_INPUT_STOP:
433             nHighSurrogate = 0;
434             break;
435 
436         case IMPL_BAD_INPUT_CONTINUE:
437             nHighSurrogate = 0;
438             continue;
439 
440         case IMPL_BAD_INPUT_NO_OUTPUT:
441             goto no_output;
442         }
443         break;
444 
445     no_output:
446         --pSrcBuf;
447         nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
448         break;
449     }
450 
451     if (nHighSurrogate != 0
452         && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
453                          | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
454                == 0)
455     {
456         if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
457             nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
458         else
459             switch (ImplHandleBadInputUnicodeToTextConversion(sal_False,
460                                                               0,
461                                                               nFlags,
462                                                               &pDestBufPtr,
463                                                               pDestBufEnd,
464                                                               &nInfo,
465                                                               NULL,
466                                                               0,
467                                                               NULL))
468             {
469             case IMPL_BAD_INPUT_STOP:
470             case IMPL_BAD_INPUT_CONTINUE:
471                 nHighSurrogate = 0;
472                 break;
473 
474             case IMPL_BAD_INPUT_NO_OUTPUT:
475                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
476                 break;
477             }
478     }
479 
480     if (pContext)
481         ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
482             = nHighSurrogate;
483     if (pInfo)
484         *pInfo = nInfo;
485     if (pSrcCvtChars)
486         *pSrcCvtChars = nConverted;
487 
488     return pDestBufPtr - pDestBuf;
489 }
490