xref: /trunk/main/sal/textenc/converteuctw.c (revision 647f063d)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #include "converteuctw.h"
25 #include "context.h"
26 #include "converter.h"
27 #include "tenchelp.h"
28 #include "unichars.h"
29 #include "rtl/alloc.h"
30 #include "rtl/textcvt.h"
31 #include "sal/types.h"
32 
33 typedef enum
34 {
35     IMPL_EUC_TW_TO_UNICODE_STATE_0,
36     IMPL_EUC_TW_TO_UNICODE_STATE_1,
37     IMPL_EUC_TW_TO_UNICODE_STATE_2_1,
38     IMPL_EUC_TW_TO_UNICODE_STATE_2_2,
39     IMPL_EUC_TW_TO_UNICODE_STATE_2_3
40 } ImplEucTwToUnicodeState;
41 
42 typedef struct
43 {
44     ImplEucTwToUnicodeState m_eState;
45     sal_Int32 m_nPlane; /* 0--15 */
46     sal_Int32 m_nRow; /* 0--93 */
47 } ImplEucTwToUnicodeContext;
48 
ImplCreateEucTwToUnicodeContext(void)49 void * ImplCreateEucTwToUnicodeContext(void)
50 {
51     void * pContext = rtl_allocateMemory(sizeof (ImplEucTwToUnicodeContext));
52     ((ImplEucTwToUnicodeContext *) pContext)->m_eState
53         = IMPL_EUC_TW_TO_UNICODE_STATE_0;
54     return pContext;
55 }
56 
ImplResetEucTwToUnicodeContext(void * pContext)57 void ImplResetEucTwToUnicodeContext(void * pContext)
58 {
59     if (pContext)
60         ((ImplEucTwToUnicodeContext *) pContext)->m_eState
61             = IMPL_EUC_TW_TO_UNICODE_STATE_0;
62 }
63 
ImplConvertEucTwToUnicode(ImplTextConverterData const * pData,void * pContext,sal_Char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)64 sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData,
65                                    void * pContext,
66                                    sal_Char const * pSrcBuf,
67                                    sal_Size nSrcBytes,
68                                    sal_Unicode * pDestBuf,
69                                    sal_Size nDestChars,
70                                    sal_uInt32 nFlags,
71                                    sal_uInt32 * pInfo,
72                                    sal_Size * pSrcCvtBytes)
73 {
74     sal_uInt16 const * pCns116431992Data
75         = ((ImplEucTwConverterData const *) pData)->
76               m_pCns116431992ToUnicodeData;
77     sal_Int32 const * pCns116431992RowOffsets
78         = ((ImplEucTwConverterData const *) pData)->
79               m_pCns116431992ToUnicodeRowOffsets;
80     sal_Int32 const * pCns116431992PlaneOffsets
81         = ((ImplEucTwConverterData const *) pData)->
82               m_pCns116431992ToUnicodePlaneOffsets;
83     ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
84     sal_Int32 nPlane = 0;
85     sal_Int32 nRow = 0;
86     sal_uInt32 nInfo = 0;
87     sal_Size nConverted = 0;
88     sal_Unicode * pDestBufPtr = pDestBuf;
89     sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
90 
91     if (pContext)
92     {
93         eState = ((ImplEucTwToUnicodeContext *) pContext)->m_eState;
94         nPlane = ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane;
95         nRow = ((ImplEucTwToUnicodeContext *) pContext)->m_nRow;
96     }
97 
98     for (; nConverted < nSrcBytes; ++nConverted)
99     {
100         sal_Bool bUndefined = sal_True;
101         sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
102         switch (eState)
103         {
104         case IMPL_EUC_TW_TO_UNICODE_STATE_0:
105             if (nChar < 0x80)
106                 if (pDestBufPtr != pDestBufEnd)
107                     *pDestBufPtr++ = (sal_Unicode) nChar;
108                 else
109                     goto no_output;
110             else if (nChar >= 0xA1 && nChar <= 0xFE)
111             {
112                 nRow = nChar - 0xA1;
113                 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1;
114             }
115             else if (nChar == 0x8E)
116                 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1;
117             else
118             {
119                 bUndefined = sal_False;
120                 goto bad_input;
121             }
122             break;
123 
124         case IMPL_EUC_TW_TO_UNICODE_STATE_1:
125             if (nChar >= 0xA1 && nChar <= 0xFE)
126             {
127                 nPlane = 0;
128                 goto transform;
129             }
130             else
131             {
132                 bUndefined = sal_False;
133                 goto bad_input;
134             }
135             break;
136 
137         case IMPL_EUC_TW_TO_UNICODE_STATE_2_1:
138             if (nChar >= 0xA1 && nChar <= 0xB0)
139             {
140                 nPlane = nChar - 0xA1;
141                 ++eState;
142             }
143             else
144             {
145                 bUndefined = sal_False;
146                 goto bad_input;
147             }
148             break;
149 
150         case IMPL_EUC_TW_TO_UNICODE_STATE_2_2:
151             if (nChar >= 0xA1 && nChar <= 0xFE)
152             {
153                 nRow = nChar - 0xA1;
154                 ++eState;
155             }
156             else
157             {
158                 bUndefined = sal_False;
159                 goto bad_input;
160             }
161             break;
162 
163         case IMPL_EUC_TW_TO_UNICODE_STATE_2_3:
164             if (nChar >= 0xA1 && nChar <= 0xFE)
165                 goto transform;
166             else
167             {
168                 bUndefined = sal_False;
169                 goto bad_input;
170             }
171             break;
172         }
173         continue;
174 
175     transform:
176         {
177             sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
178             if (nPlaneOffset == -1)
179                 goto bad_input;
180             else
181             {
182                 sal_Int32 nOffset
183                     = pCns116431992RowOffsets[nPlaneOffset + nRow];
184                 if (nOffset == -1)
185                     goto bad_input;
186                 else
187                 {
188                     sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
189                     sal_uInt32 nFirst = nFirstLast & 0xFF;
190                     sal_uInt32 nLast = nFirstLast >> 8;
191                     nChar -= 0xA0;
192                     if (nChar >= nFirst && nChar <= nLast)
193                     {
194                         sal_uInt32 nUnicode
195                             = pCns116431992Data[nOffset + (nChar - nFirst)];
196                         if (nUnicode == 0xFFFF)
197                             goto bad_input;
198                         else if (ImplIsHighSurrogate(nUnicode))
199                             if (pDestBufEnd - pDestBufPtr >= 2)
200                             {
201                                 nOffset += nLast - nFirst + 1;
202                                 nFirst = pCns116431992Data[nOffset++];
203                                 *pDestBufPtr++ = (sal_Unicode) nUnicode;
204                                 *pDestBufPtr++
205                                     = (sal_Unicode)
206                                           pCns116431992Data[
207                                               nOffset + (nChar - nFirst)];
208                             }
209                             else
210                                 goto no_output;
211                         else
212                             if (pDestBufPtr != pDestBufEnd)
213                                 *pDestBufPtr++ = (sal_Unicode) nUnicode;
214                             else
215                                 goto no_output;
216                     }
217                     else
218                         goto bad_input;
219                     eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
220                 }
221             }
222             continue;
223         }
224 
225     bad_input:
226         switch (ImplHandleBadInputTextToUnicodeConversion(
227                     bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
228                     &nInfo))
229         {
230         case IMPL_BAD_INPUT_STOP:
231             eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
232             break;
233 
234         case IMPL_BAD_INPUT_CONTINUE:
235             eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
236             continue;
237 
238         case IMPL_BAD_INPUT_NO_OUTPUT:
239             goto no_output;
240         }
241         break;
242 
243     no_output:
244         --pSrcBuf;
245         nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
246         break;
247     }
248 
249     if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0
250         && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
251                          | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
252                == 0)
253     {
254         if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
255             nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
256         else
257             switch (ImplHandleBadInputTextToUnicodeConversion(
258                         sal_False, sal_True, 0, nFlags, &pDestBufPtr,
259                         pDestBufEnd, &nInfo))
260             {
261             case IMPL_BAD_INPUT_STOP:
262             case IMPL_BAD_INPUT_CONTINUE:
263                 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
264                 break;
265 
266             case IMPL_BAD_INPUT_NO_OUTPUT:
267                 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
268                 break;
269             }
270     }
271 
272     if (pContext)
273     {
274         ((ImplEucTwToUnicodeContext *) pContext)->m_eState = eState;
275         ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane = nPlane;
276         ((ImplEucTwToUnicodeContext *) pContext)->m_nRow = nRow;
277     }
278     if (pInfo)
279         *pInfo = nInfo;
280     if (pSrcCvtBytes)
281         *pSrcCvtBytes = nConverted;
282 
283     return pDestBufPtr - pDestBuf;
284 }
285 
ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)286 sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData,
287                                    void * pContext,
288                                    sal_Unicode const * pSrcBuf,
289                                    sal_Size nSrcChars,
290                                    sal_Char * pDestBuf,
291                                    sal_Size nDestBytes,
292                                    sal_uInt32 nFlags,
293                                    sal_uInt32 * pInfo,
294                                    sal_Size * pSrcCvtChars)
295 {
296     sal_uInt8 const * pCns116431992Data
297         = ((ImplEucTwConverterData const *) pData)->
298               m_pUnicodeToCns116431992Data;
299     sal_Int32 const * pCns116431992PageOffsets
300         = ((ImplEucTwConverterData const *) pData)->
301               m_pUnicodeToCns116431992PageOffsets;
302     sal_Int32 const * pCns116431992PlaneOffsets
303         = ((ImplEucTwConverterData const *) pData)->
304               m_pUnicodeToCns116431992PlaneOffsets;
305     sal_Unicode nHighSurrogate = 0;
306     sal_uInt32 nInfo = 0;
307     sal_Size nConverted = 0;
308     sal_Char * pDestBufPtr = pDestBuf;
309     sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
310 
311     if (pContext)
312         nHighSurrogate
313             = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
314 
315     for (; nConverted < nSrcChars; ++nConverted)
316     {
317         sal_Bool bUndefined = sal_True;
318         sal_uInt32 nChar = *pSrcBuf++;
319         if (nHighSurrogate == 0)
320         {
321             if (ImplIsHighSurrogate(nChar))
322             {
323                 nHighSurrogate = (sal_Unicode) nChar;
324                 continue;
325             }
326         }
327         else if (ImplIsLowSurrogate(nChar))
328             nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
329         else
330         {
331             bUndefined = sal_False;
332             goto bad_input;
333         }
334 
335         if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
336         {
337             bUndefined = sal_False;
338             goto bad_input;
339         }
340 
341         if (nChar < 0x80)
342             if (pDestBufPtr != pDestBufEnd)
343                 *pDestBufPtr++ = (sal_Char) nChar;
344             else
345                 goto no_output;
346         else
347         {
348             sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
349             sal_uInt32 nFirst;
350             sal_uInt32 nLast;
351             sal_uInt32 nPlane;
352             if (nOffset == -1)
353                 goto bad_input;
354             nOffset
355                 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
356             if (nOffset == -1)
357                 goto bad_input;
358             nFirst = pCns116431992Data[nOffset++];
359             nLast = pCns116431992Data[nOffset++];
360             nChar &= 0xFF;
361             if (nChar < nFirst || nChar > nLast)
362                 goto bad_input;
363             nOffset += 3 * (nChar - nFirst);
364             nPlane = pCns116431992Data[nOffset++];
365             if (nPlane == 0)
366                 goto bad_input;
367             if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4))
368                 goto no_output;
369             if (nPlane != 1)
370             {
371                 *pDestBufPtr++ = (sal_Char) (unsigned char) 0x8E;
372                 *pDestBufPtr++ = (sal_Char) (0xA0 + nPlane);
373             }
374             *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset++]);
375             *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset]);
376         }
377         nHighSurrogate = 0;
378         continue;
379 
380     bad_input:
381         switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined,
382                                                           nChar,
383                                                           nFlags,
384                                                           &pDestBufPtr,
385                                                           pDestBufEnd,
386                                                           &nInfo,
387                                                           NULL,
388                                                           0,
389                                                           NULL))
390         {
391         case IMPL_BAD_INPUT_STOP:
392             nHighSurrogate = 0;
393             break;
394 
395         case IMPL_BAD_INPUT_CONTINUE:
396             nHighSurrogate = 0;
397             continue;
398 
399         case IMPL_BAD_INPUT_NO_OUTPUT:
400             goto no_output;
401         }
402         break;
403 
404     no_output:
405         --pSrcBuf;
406         nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
407         break;
408     }
409 
410     if (nHighSurrogate != 0
411         && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
412                          | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
413                == 0)
414     {
415         if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
416             nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
417         else
418             switch (ImplHandleBadInputUnicodeToTextConversion(sal_False,
419                                                               0,
420                                                               nFlags,
421                                                               &pDestBufPtr,
422                                                               pDestBufEnd,
423                                                               &nInfo,
424                                                               NULL,
425                                                               0,
426                                                               NULL))
427             {
428             case IMPL_BAD_INPUT_STOP:
429             case IMPL_BAD_INPUT_CONTINUE:
430                 nHighSurrogate = 0;
431                 break;
432 
433             case IMPL_BAD_INPUT_NO_OUTPUT:
434                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
435                 break;
436             }
437     }
438 
439     if (pContext)
440         ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
441             = nHighSurrogate;
442     if (pInfo)
443         *pInfo = nInfo;
444     if (pSrcCvtChars)
445         *pSrcCvtChars = nConverted;
446 
447     return pDestBufPtr - pDestBuf;
448 }
449