1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #include "convertbig5hkscs.h"
25 #include "context.h"
26 #include "converter.h"
27 #include "tenchelp.h"
28 #include "unichars.h"
29 #include "osl/diagnose.h"
30 #include "rtl/alloc.h"
31 #include "rtl/textcvt.h"
32 #include "sal/types.h"
33
34 typedef struct
35 {
36 sal_Int32 m_nRow; /* 0--255; 0 means none */
37 } ImplBig5HkscsToUnicodeContext;
38
ImplCreateBig5HkscsToUnicodeContext(void)39 void * ImplCreateBig5HkscsToUnicodeContext(void)
40 {
41 void * pContext
42 = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext));
43 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
44 return pContext;
45 }
46
ImplResetBig5HkscsToUnicodeContext(void * pContext)47 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
48 {
49 if (pContext)
50 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0;
51 }
52
ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData,void * pContext,sal_Char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)53 sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData,
54 void * pContext,
55 sal_Char const * pSrcBuf,
56 sal_Size nSrcBytes,
57 sal_Unicode * pDestBuf,
58 sal_Size nDestChars,
59 sal_uInt32 nFlags,
60 sal_uInt32 * pInfo,
61 sal_Size * pSrcCvtBytes)
62 {
63 sal_uInt16 const * pBig5Hkscs2001Data
64 = ((ImplBig5HkscsConverterData const *) pData)->
65 m_pBig5Hkscs2001ToUnicodeData;
66 sal_Int32 const * pBig5Hkscs2001RowOffsets
67 = ((ImplBig5HkscsConverterData const *) pData)->
68 m_pBig5Hkscs2001ToUnicodeRowOffsets;
69 ImplDBCSToUniLeadTab const * pBig5Data
70 = ((ImplBig5HkscsConverterData const *) pData)->
71 m_pBig5ToUnicodeData;
72 sal_Int32 nRow = 0;
73 sal_uInt32 nInfo = 0;
74 sal_Size nConverted = 0;
75 sal_Unicode * pDestBufPtr = pDestBuf;
76 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
77
78 if (pContext)
79 nRow = ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow;
80
81 for (; nConverted < nSrcBytes; ++nConverted)
82 {
83 sal_Bool bUndefined = sal_True;
84 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
85 if (nRow == 0)
86 if (nChar < 0x80)
87 if (pDestBufPtr != pDestBufEnd)
88 *pDestBufPtr++ = (sal_Unicode) nChar;
89 else
90 goto no_output;
91 else if (nChar >= 0x81 && nChar <= 0xFE)
92 nRow = nChar;
93 else
94 {
95 bUndefined = sal_False;
96 goto bad_input;
97 }
98 else
99 if ((nChar >= 0x40 && nChar <= 0x7E)
100 || (nChar >= 0xA1 && nChar <= 0xFE))
101 {
102 sal_uInt32 nUnicode = 0xFFFF;
103 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
104 sal_uInt32 nFirst=0;
105 sal_uInt32 nLast=0;
106 if (nOffset != -1)
107 {
108 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
109 nFirst = nFirstLast & 0xFF;
110 nLast = nFirstLast >> 8;
111 if (nChar >= nFirst && nChar <= nLast)
112 nUnicode
113 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
114 }
115 if (nUnicode == 0xFFFF)
116 {
117 sal_uInt32 nFirst = pBig5Data[nRow].mnTrailStart;
118 if (nChar >= nFirst
119 && nChar <= pBig5Data[nRow].mnTrailEnd)
120 {
121 nUnicode
122 = pBig5Data[nRow].mpToUniTrailTab[nChar - nFirst];
123 if (nUnicode == 0)
124 nUnicode = 0xFFFF;
125 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
126 }
127 }
128 if (nUnicode == 0xFFFF)
129 {
130 ImplDBCSEUDCData const * p
131 = ((ImplBig5HkscsConverterData const *) pData)->
132 m_pEudcData;
133 sal_uInt32 nCount
134 = ((ImplBig5HkscsConverterData const *) pData)->
135 m_nEudcCount;
136 sal_uInt32 i;
137 for (i = 0; i < nCount; ++i)
138 {
139 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
140 {
141 if (nChar < p->mnTrail1Start)
142 break;
143 if (nChar <= p->mnTrail1End)
144 {
145 nUnicode
146 = p->mnUniStart
147 + (nRow - p->mnLeadStart)
148 * p->mnTrailRangeCount
149 + (nChar - p->mnTrail1Start);
150 break;
151 }
152 if (p->mnTrailCount < 2
153 || nChar < p->mnTrail2Start)
154 break;
155 if (nChar <= p->mnTrail2End)
156 {
157 nUnicode
158 = p->mnUniStart
159 + (nRow - p->mnLeadStart)
160 * p->mnTrailRangeCount
161 + (nChar - p->mnTrail2Start)
162 + (p->mnTrail1End - p->mnTrail1Start
163 + 1);
164 break;
165 }
166 if (p->mnTrailCount < 3
167 || nChar < p->mnTrail3Start)
168 break;
169 if (nChar <= p->mnTrail3End)
170 {
171 nUnicode
172 = p->mnUniStart
173 + (nRow - p->mnLeadStart)
174 * p->mnTrailRangeCount
175 + (nChar - p->mnTrail3Start)
176 + (p->mnTrail1End - p->mnTrail1Start
177 + 1)
178 + (p->mnTrail2End - p->mnTrail2Start
179 + 1);
180 break;
181 }
182 break;
183 }
184 ++p;
185 }
186 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode));
187 }
188 if (nUnicode == 0xFFFF)
189 goto bad_input;
190 if (ImplIsHighSurrogate(nUnicode))
191 if (pDestBufEnd - pDestBufPtr >= 2)
192 {
193 nOffset += nLast - nFirst + 1;
194 nFirst = pBig5Hkscs2001Data[nOffset++];
195 *pDestBufPtr++ = (sal_Unicode) nUnicode;
196 *pDestBufPtr++
197 = (sal_Unicode) pBig5Hkscs2001Data[
198 nOffset + (nChar - nFirst)];
199 }
200 else
201 goto no_output;
202 else
203 if (pDestBufPtr != pDestBufEnd)
204 *pDestBufPtr++ = (sal_Unicode) nUnicode;
205 else
206 goto no_output;
207 nRow = 0;
208 }
209 else
210 {
211 bUndefined = sal_False;
212 goto bad_input;
213 }
214 continue;
215
216 bad_input:
217 switch (ImplHandleBadInputTextToUnicodeConversion(
218 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
219 &nInfo))
220 {
221 case IMPL_BAD_INPUT_STOP:
222 nRow = 0;
223 break;
224
225 case IMPL_BAD_INPUT_CONTINUE:
226 nRow = 0;
227 continue;
228
229 case IMPL_BAD_INPUT_NO_OUTPUT:
230 goto no_output;
231 }
232 break;
233
234 no_output:
235 --pSrcBuf;
236 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
237 break;
238 }
239
240 if (nRow != 0
241 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
242 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
243 == 0)
244 {
245 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
246 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
247 else
248 switch (ImplHandleBadInputTextToUnicodeConversion(
249 sal_False, sal_True, 0, nFlags, &pDestBufPtr,
250 pDestBufEnd, &nInfo))
251 {
252 case IMPL_BAD_INPUT_STOP:
253 case IMPL_BAD_INPUT_CONTINUE:
254 nRow = 0;
255 break;
256
257 case IMPL_BAD_INPUT_NO_OUTPUT:
258 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
259 break;
260 }
261 }
262
263 if (pContext)
264 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = nRow;
265 if (pInfo)
266 *pInfo = nInfo;
267 if (pSrcCvtBytes)
268 *pSrcCvtBytes = nConverted;
269
270 return pDestBufPtr - pDestBuf;
271 }
272
ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)273 sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData,
274 void * pContext,
275 sal_Unicode const * pSrcBuf,
276 sal_Size nSrcChars,
277 sal_Char * pDestBuf,
278 sal_Size nDestBytes,
279 sal_uInt32 nFlags,
280 sal_uInt32 * pInfo,
281 sal_Size * pSrcCvtChars)
282 {
283 sal_uInt16 const * pBig5Hkscs2001Data
284 = ((ImplBig5HkscsConverterData const *) pData)->
285 m_pUnicodeToBig5Hkscs2001Data;
286 sal_Int32 const * pBig5Hkscs2001PageOffsets
287 = ((ImplBig5HkscsConverterData const *) pData)->
288 m_pUnicodeToBig5Hkscs2001PageOffsets;
289 sal_Int32 const * pBig5Hkscs2001PlaneOffsets
290 = ((ImplBig5HkscsConverterData const *) pData)->
291 m_pUnicodeToBig5Hkscs2001PlaneOffsets;
292 ImplUniToDBCSHighTab const * pBig5Data
293 = ((ImplBig5HkscsConverterData const *) pData)->
294 m_pUnicodeToBig5Data;
295 sal_Unicode nHighSurrogate = 0;
296 sal_uInt32 nInfo = 0;
297 sal_Size nConverted = 0;
298 sal_Char * pDestBufPtr = pDestBuf;
299 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
300
301 if (pContext)
302 nHighSurrogate
303 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate;
304
305 for (; nConverted < nSrcChars; ++nConverted)
306 {
307 sal_Bool bUndefined = sal_True;
308 sal_uInt32 nChar = *pSrcBuf++;
309 if (nHighSurrogate == 0)
310 {
311 if (ImplIsHighSurrogate(nChar))
312 {
313 nHighSurrogate = (sal_Unicode) nChar;
314 continue;
315 }
316 }
317 else if (ImplIsLowSurrogate(nChar))
318 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
319 else
320 {
321 bUndefined = sal_False;
322 goto bad_input;
323 }
324
325 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
326 {
327 bUndefined = sal_False;
328 goto bad_input;
329 }
330
331 if (nChar < 0x80)
332 if (pDestBufPtr != pDestBufEnd)
333 *pDestBufPtr++ = (sal_Char) nChar;
334 else
335 goto no_output;
336 else
337 {
338 sal_uInt32 nBytes = 0;
339 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
340 if (nOffset != -1)
341 {
342 nOffset
343 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
344 >> 8)];
345 if (nOffset != -1)
346 {
347 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
348 sal_uInt32 nFirst = nFirstLast & 0xFF;
349 sal_uInt32 nLast = nFirstLast >> 8;
350 sal_uInt32 nIndex = nChar & 0xFF;
351 if (nIndex >= nFirst && nIndex <= nLast)
352 {
353 nBytes
354 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
355 }
356 }
357 }
358 if (nBytes == 0)
359 {
360 sal_uInt32 nIndex1 = nChar >> 8;
361 if (nIndex1 < 0x100)
362 {
363 sal_uInt32 nIndex2 = nChar & 0xFF;
364 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
365 if (nIndex2 >= nFirst
366 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
367 nBytes = pBig5Data[nIndex1].
368 mpToUniTrailTab[nIndex2 - nFirst];
369 }
370 }
371 if (nBytes == 0)
372 {
373 ImplDBCSEUDCData const * p
374 = ((ImplBig5HkscsConverterData const *) pData)->
375 m_pEudcData;
376 sal_uInt32 nCount
377 = ((ImplBig5HkscsConverterData const *) pData)->
378 m_nEudcCount;
379 sal_uInt32 i;
380 for (i = 0; i < nCount; ++i) {
381 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
382 {
383 sal_uInt32 nIndex = nChar - p->mnUniStart;
384 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
385 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
386 sal_uInt32 nSize;
387 nBytes = (p->mnLeadStart + nLeadOff) << 8;
388 nSize = p->mnTrail1End - p->mnTrail1Start + 1;
389 if (nTrailOff < nSize)
390 {
391 nBytes |= p->mnTrail1Start + nTrailOff;
392 break;
393 }
394 nTrailOff -= nSize;
395 nSize = p->mnTrail2End - p->mnTrail2Start + 1;
396 if (nTrailOff < nSize)
397 {
398 nBytes |= p->mnTrail2Start + nTrailOff;
399 break;
400 }
401 nTrailOff -= nSize;
402 nBytes |= p->mnTrail3Start + nTrailOff;
403 break;
404 }
405 ++p;
406 }
407 }
408 if (nBytes == 0)
409 goto bad_input;
410 if (pDestBufEnd - pDestBufPtr >= 2)
411 {
412 *pDestBufPtr++ = (sal_Char) (nBytes >> 8);
413 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF);
414 }
415 else
416 goto no_output;
417 }
418 nHighSurrogate = 0;
419 continue;
420
421 bad_input:
422 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined,
423 nChar,
424 nFlags,
425 &pDestBufPtr,
426 pDestBufEnd,
427 &nInfo,
428 NULL,
429 0,
430 NULL))
431 {
432 case IMPL_BAD_INPUT_STOP:
433 nHighSurrogate = 0;
434 break;
435
436 case IMPL_BAD_INPUT_CONTINUE:
437 nHighSurrogate = 0;
438 continue;
439
440 case IMPL_BAD_INPUT_NO_OUTPUT:
441 goto no_output;
442 }
443 break;
444
445 no_output:
446 --pSrcBuf;
447 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
448 break;
449 }
450
451 if (nHighSurrogate != 0
452 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
453 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
454 == 0)
455 {
456 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
457 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
458 else
459 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False,
460 0,
461 nFlags,
462 &pDestBufPtr,
463 pDestBufEnd,
464 &nInfo,
465 NULL,
466 0,
467 NULL))
468 {
469 case IMPL_BAD_INPUT_STOP:
470 case IMPL_BAD_INPUT_CONTINUE:
471 nHighSurrogate = 0;
472 break;
473
474 case IMPL_BAD_INPUT_NO_OUTPUT:
475 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
476 break;
477 }
478 }
479
480 if (pContext)
481 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate
482 = nHighSurrogate;
483 if (pInfo)
484 *pInfo = nInfo;
485 if (pSrcCvtChars)
486 *pSrcCvtChars = nConverted;
487
488 return pDestBufPtr - pDestBuf;
489 }
490