1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #include "converteuctw.h"
25 #include "context.h"
26 #include "converter.h"
27 #include "tenchelp.h"
28 #include "unichars.h"
29 #include "rtl/alloc.h"
30 #include "rtl/textcvt.h"
31 #include "sal/types.h"
32
33 typedef enum
34 {
35 IMPL_EUC_TW_TO_UNICODE_STATE_0,
36 IMPL_EUC_TW_TO_UNICODE_STATE_1,
37 IMPL_EUC_TW_TO_UNICODE_STATE_2_1,
38 IMPL_EUC_TW_TO_UNICODE_STATE_2_2,
39 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
40 } ImplEucTwToUnicodeState;
41
42 typedef struct
43 {
44 ImplEucTwToUnicodeState m_eState;
45 sal_Int32 m_nPlane; /* 0--15 */
46 sal_Int32 m_nRow; /* 0--93 */
47 } ImplEucTwToUnicodeContext;
48
ImplCreateEucTwToUnicodeContext(void)49 void * ImplCreateEucTwToUnicodeContext(void)
50 {
51 void * pContext = rtl_allocateMemory(sizeof (ImplEucTwToUnicodeContext));
52 ((ImplEucTwToUnicodeContext *) pContext)->m_eState
53 = IMPL_EUC_TW_TO_UNICODE_STATE_0;
54 return pContext;
55 }
56
ImplResetEucTwToUnicodeContext(void * pContext)57 void ImplResetEucTwToUnicodeContext(void * pContext)
58 {
59 if (pContext)
60 ((ImplEucTwToUnicodeContext *) pContext)->m_eState
61 = IMPL_EUC_TW_TO_UNICODE_STATE_0;
62 }
63
ImplConvertEucTwToUnicode(ImplTextConverterData const * pData,void * pContext,sal_Char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)64 sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData,
65 void * pContext,
66 sal_Char const * pSrcBuf,
67 sal_Size nSrcBytes,
68 sal_Unicode * pDestBuf,
69 sal_Size nDestChars,
70 sal_uInt32 nFlags,
71 sal_uInt32 * pInfo,
72 sal_Size * pSrcCvtBytes)
73 {
74 sal_uInt16 const * pCns116431992Data
75 = ((ImplEucTwConverterData const *) pData)->
76 m_pCns116431992ToUnicodeData;
77 sal_Int32 const * pCns116431992RowOffsets
78 = ((ImplEucTwConverterData const *) pData)->
79 m_pCns116431992ToUnicodeRowOffsets;
80 sal_Int32 const * pCns116431992PlaneOffsets
81 = ((ImplEucTwConverterData const *) pData)->
82 m_pCns116431992ToUnicodePlaneOffsets;
83 ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
84 sal_Int32 nPlane = 0;
85 sal_Int32 nRow = 0;
86 sal_uInt32 nInfo = 0;
87 sal_Size nConverted = 0;
88 sal_Unicode * pDestBufPtr = pDestBuf;
89 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
90
91 if (pContext)
92 {
93 eState = ((ImplEucTwToUnicodeContext *) pContext)->m_eState;
94 nPlane = ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane;
95 nRow = ((ImplEucTwToUnicodeContext *) pContext)->m_nRow;
96 }
97
98 for (; nConverted < nSrcBytes; ++nConverted)
99 {
100 sal_Bool bUndefined = sal_True;
101 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
102 switch (eState)
103 {
104 case IMPL_EUC_TW_TO_UNICODE_STATE_0:
105 if (nChar < 0x80)
106 if (pDestBufPtr != pDestBufEnd)
107 *pDestBufPtr++ = (sal_Unicode) nChar;
108 else
109 goto no_output;
110 else if (nChar >= 0xA1 && nChar <= 0xFE)
111 {
112 nRow = nChar - 0xA1;
113 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1;
114 }
115 else if (nChar == 0x8E)
116 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1;
117 else
118 {
119 bUndefined = sal_False;
120 goto bad_input;
121 }
122 break;
123
124 case IMPL_EUC_TW_TO_UNICODE_STATE_1:
125 if (nChar >= 0xA1 && nChar <= 0xFE)
126 {
127 nPlane = 0;
128 goto transform;
129 }
130 else
131 {
132 bUndefined = sal_False;
133 goto bad_input;
134 }
135 break;
136
137 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1:
138 if (nChar >= 0xA1 && nChar <= 0xB0)
139 {
140 nPlane = nChar - 0xA1;
141 ++eState;
142 }
143 else
144 {
145 bUndefined = sal_False;
146 goto bad_input;
147 }
148 break;
149
150 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2:
151 if (nChar >= 0xA1 && nChar <= 0xFE)
152 {
153 nRow = nChar - 0xA1;
154 ++eState;
155 }
156 else
157 {
158 bUndefined = sal_False;
159 goto bad_input;
160 }
161 break;
162
163 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3:
164 if (nChar >= 0xA1 && nChar <= 0xFE)
165 goto transform;
166 else
167 {
168 bUndefined = sal_False;
169 goto bad_input;
170 }
171 break;
172 }
173 continue;
174
175 transform:
176 {
177 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
178 if (nPlaneOffset == -1)
179 goto bad_input;
180 else
181 {
182 sal_Int32 nOffset
183 = pCns116431992RowOffsets[nPlaneOffset + nRow];
184 if (nOffset == -1)
185 goto bad_input;
186 else
187 {
188 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
189 sal_uInt32 nFirst = nFirstLast & 0xFF;
190 sal_uInt32 nLast = nFirstLast >> 8;
191 nChar -= 0xA0;
192 if (nChar >= nFirst && nChar <= nLast)
193 {
194 sal_uInt32 nUnicode
195 = pCns116431992Data[nOffset + (nChar - nFirst)];
196 if (nUnicode == 0xFFFF)
197 goto bad_input;
198 else if (ImplIsHighSurrogate(nUnicode))
199 if (pDestBufEnd - pDestBufPtr >= 2)
200 {
201 nOffset += nLast - nFirst + 1;
202 nFirst = pCns116431992Data[nOffset++];
203 *pDestBufPtr++ = (sal_Unicode) nUnicode;
204 *pDestBufPtr++
205 = (sal_Unicode)
206 pCns116431992Data[
207 nOffset + (nChar - nFirst)];
208 }
209 else
210 goto no_output;
211 else
212 if (pDestBufPtr != pDestBufEnd)
213 *pDestBufPtr++ = (sal_Unicode) nUnicode;
214 else
215 goto no_output;
216 }
217 else
218 goto bad_input;
219 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
220 }
221 }
222 continue;
223 }
224
225 bad_input:
226 switch (ImplHandleBadInputTextToUnicodeConversion(
227 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
228 &nInfo))
229 {
230 case IMPL_BAD_INPUT_STOP:
231 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
232 break;
233
234 case IMPL_BAD_INPUT_CONTINUE:
235 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
236 continue;
237
238 case IMPL_BAD_INPUT_NO_OUTPUT:
239 goto no_output;
240 }
241 break;
242
243 no_output:
244 --pSrcBuf;
245 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
246 break;
247 }
248
249 if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0
250 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
251 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
252 == 0)
253 {
254 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
255 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
256 else
257 switch (ImplHandleBadInputTextToUnicodeConversion(
258 sal_False, sal_True, 0, nFlags, &pDestBufPtr,
259 pDestBufEnd, &nInfo))
260 {
261 case IMPL_BAD_INPUT_STOP:
262 case IMPL_BAD_INPUT_CONTINUE:
263 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0;
264 break;
265
266 case IMPL_BAD_INPUT_NO_OUTPUT:
267 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
268 break;
269 }
270 }
271
272 if (pContext)
273 {
274 ((ImplEucTwToUnicodeContext *) pContext)->m_eState = eState;
275 ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane = nPlane;
276 ((ImplEucTwToUnicodeContext *) pContext)->m_nRow = nRow;
277 }
278 if (pInfo)
279 *pInfo = nInfo;
280 if (pSrcCvtBytes)
281 *pSrcCvtBytes = nConverted;
282
283 return pDestBufPtr - pDestBuf;
284 }
285
ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)286 sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData,
287 void * pContext,
288 sal_Unicode const * pSrcBuf,
289 sal_Size nSrcChars,
290 sal_Char * pDestBuf,
291 sal_Size nDestBytes,
292 sal_uInt32 nFlags,
293 sal_uInt32 * pInfo,
294 sal_Size * pSrcCvtChars)
295 {
296 sal_uInt8 const * pCns116431992Data
297 = ((ImplEucTwConverterData const *) pData)->
298 m_pUnicodeToCns116431992Data;
299 sal_Int32 const * pCns116431992PageOffsets
300 = ((ImplEucTwConverterData const *) pData)->
301 m_pUnicodeToCns116431992PageOffsets;
302 sal_Int32 const * pCns116431992PlaneOffsets
303 = ((ImplEucTwConverterData const *) pData)->
304 m_pUnicodeToCns116431992PlaneOffsets;
305 sal_Unicode nHighSurrogate = 0;
306 sal_uInt32 nInfo = 0;
307 sal_Size nConverted = 0;
308 sal_Char * pDestBufPtr = pDestBuf;
309 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
310
311 if (pContext)
312 nHighSurrogate
313 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
314
315 for (; nConverted < nSrcChars; ++nConverted)
316 {
317 sal_Bool bUndefined = sal_True;
318 sal_uInt32 nChar = *pSrcBuf++;
319 if (nHighSurrogate == 0)
320 {
321 if (ImplIsHighSurrogate(nChar))
322 {
323 nHighSurrogate = (sal_Unicode) nChar;
324 continue;
325 }
326 }
327 else if (ImplIsLowSurrogate(nChar))
328 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
329 else
330 {
331 bUndefined = sal_False;
332 goto bad_input;
333 }
334
335 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
336 {
337 bUndefined = sal_False;
338 goto bad_input;
339 }
340
341 if (nChar < 0x80)
342 if (pDestBufPtr != pDestBufEnd)
343 *pDestBufPtr++ = (sal_Char) nChar;
344 else
345 goto no_output;
346 else
347 {
348 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
349 sal_uInt32 nFirst;
350 sal_uInt32 nLast;
351 sal_uInt32 nPlane;
352 if (nOffset == -1)
353 goto bad_input;
354 nOffset
355 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
356 if (nOffset == -1)
357 goto bad_input;
358 nFirst = pCns116431992Data[nOffset++];
359 nLast = pCns116431992Data[nOffset++];
360 nChar &= 0xFF;
361 if (nChar < nFirst || nChar > nLast)
362 goto bad_input;
363 nOffset += 3 * (nChar - nFirst);
364 nPlane = pCns116431992Data[nOffset++];
365 if (nPlane == 0)
366 goto bad_input;
367 if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4))
368 goto no_output;
369 if (nPlane != 1)
370 {
371 *pDestBufPtr++ = (sal_Char) (unsigned char) 0x8E;
372 *pDestBufPtr++ = (sal_Char) (0xA0 + nPlane);
373 }
374 *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset++]);
375 *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset]);
376 }
377 nHighSurrogate = 0;
378 continue;
379
380 bad_input:
381 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined,
382 nChar,
383 nFlags,
384 &pDestBufPtr,
385 pDestBufEnd,
386 &nInfo,
387 NULL,
388 0,
389 NULL))
390 {
391 case IMPL_BAD_INPUT_STOP:
392 nHighSurrogate = 0;
393 break;
394
395 case IMPL_BAD_INPUT_CONTINUE:
396 nHighSurrogate = 0;
397 continue;
398
399 case IMPL_BAD_INPUT_NO_OUTPUT:
400 goto no_output;
401 }
402 break;
403
404 no_output:
405 --pSrcBuf;
406 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
407 break;
408 }
409
410 if (nHighSurrogate != 0
411 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
412 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
413 == 0)
414 {
415 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
416 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
417 else
418 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False,
419 0,
420 nFlags,
421 &pDestBufPtr,
422 pDestBufEnd,
423 &nInfo,
424 NULL,
425 0,
426 NULL))
427 {
428 case IMPL_BAD_INPUT_STOP:
429 case IMPL_BAD_INPUT_CONTINUE:
430 nHighSurrogate = 0;
431 break;
432
433 case IMPL_BAD_INPUT_NO_OUTPUT:
434 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
435 break;
436 }
437 }
438
439 if (pContext)
440 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
441 = nHighSurrogate;
442 if (pInfo)
443 *pInfo = nInfo;
444 if (pSrcCvtChars)
445 *pSrcCvtChars = nConverted;
446
447 return pDestBufPtr - pDestBuf;
448 }
449