xref: /trunk/main/sal/textenc/tcvtmb.c (revision 86e1cf34)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #include "tenchelp.h"
25 #include "unichars.h"
26 #include "rtl/textcvt.h"
27 
28 /* ======================================================================= */
29 
30 /* DBCS to Unicode conversion routine use a lead table for the first byte, */
31 /* where we determine the trail table or for single byte chars the unicode */
32 /* value. We have for all lead byte a separate table, because we can */
33 /* then share many tables for different charset encodings. */
34 
35 /* ======================================================================= */
36 
ImplDBCSToUnicode(const ImplTextConverterData * pData,void * pContext,const sal_Char * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)37 sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext,
38                             const sal_Char* pSrcBuf, sal_Size nSrcBytes,
39                             sal_Unicode* pDestBuf, sal_Size nDestChars,
40                             sal_uInt32 nFlags, sal_uInt32* pInfo,
41                             sal_Size* pSrcCvtBytes )
42 {
43     sal_uChar                   cLead;
44     sal_uChar                   cTrail;
45     sal_Unicode                 cConv;
46     const ImplDBCSToUniLeadTab* pLeadEntry;
47     const ImplDBCSConvertData*  pConvertData = (const ImplDBCSConvertData*)pData;
48     const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
49     sal_Unicode*                pEndDestBuf;
50     const sal_Char*             pEndSrcBuf;
51 
52     (void) pContext; /* unused */
53 
54     *pInfo = 0;
55     pEndDestBuf = pDestBuf+nDestChars;
56     pEndSrcBuf  = pSrcBuf+nSrcBytes;
57     while ( pSrcBuf < pEndSrcBuf )
58     {
59         cLead = (sal_uChar)*pSrcBuf;
60 
61         /* get entry for the lead byte */
62         pLeadEntry = pLeadTab+cLead;
63 
64         /* SingleByte char? */
65         if (pLeadEntry->mpToUniTrailTab == NULL
66             || cLead < pConvertData->mnLeadStart
67             || cLead > pConvertData->mnLeadEnd)
68         {
69             cConv = pLeadEntry->mnUniChar;
70             if ( !cConv && (cLead != 0) )
71             {
72                 *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
73                 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
74                 {
75                     *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
76                     break;
77                 }
78                 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
79                 {
80                     pSrcBuf++;
81                     continue;
82                 }
83                 else
84                     cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
85             }
86         }
87         else
88         {
89             /* Source buffer to small */
90             if ( pSrcBuf +1 == pEndSrcBuf )
91             {
92                 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
93                 break;
94             }
95 
96             pSrcBuf++;
97             cTrail = (sal_uChar)*pSrcBuf;
98             if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
99                 cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
100             else
101                 cConv = 0;
102 
103             if ( !cConv )
104             {
105                 /* EUDC Ranges */
106                 sal_uInt16              i;
107                 const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
108                 for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
109                 {
110                     if ( (cLead >= pEUDCTab->mnLeadStart) &&
111                          (cLead <= pEUDCTab->mnLeadEnd) )
112                     {
113                         sal_uInt16 nTrailCount = 0;
114                         if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
115                              (cTrail <= pEUDCTab->mnTrail1End) )
116                         {
117                             cConv = pEUDCTab->mnUniStart+
118                                     ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
119                                     (cTrail-pEUDCTab->mnTrail1Start);
120                             break;
121                         }
122                         else
123                         {
124                             nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
125                             if ( (pEUDCTab->mnTrailCount >= 2) &&
126                                  (cTrail >= pEUDCTab->mnTrail2Start) &&
127                                  (cTrail <= pEUDCTab->mnTrail2End) )
128                             {
129                                 cConv = pEUDCTab->mnUniStart+
130                                         ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
131                                         nTrailCount+
132                                         (cTrail-pEUDCTab->mnTrail2Start);
133                                 break;
134                             }
135                             else
136                             {
137                                 nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
138                                 if ( (pEUDCTab->mnTrailCount >= 3) &&
139                                      (cTrail >= pEUDCTab->mnTrail3Start) &&
140                                      (cTrail <= pEUDCTab->mnTrail3End) )
141                                 {
142                                     cConv = pEUDCTab->mnUniStart+
143                                             ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
144                                             nTrailCount+
145                                             (cTrail-pEUDCTab->mnTrail3Start);
146                                     break;
147                                 }
148                             }
149                         }
150                     }
151 
152                     pEUDCTab++;
153                 }
154 
155                 if ( !cConv )
156                 {
157                     /* Wir vergleichen den kompletten Trailbereich den wir */
158                     /* definieren, der normalerweise groesser sein kann als */
159                     /* der definierte. Dies machen wir, damit Erweiterungen von */
160                     /* uns nicht beruecksichtigten Encodings so weit wie */
161                     /* moeglich auch richtig zu behandeln, das double byte */
162                     /* characters auch als ein einzelner Character behandelt */
163                     /* wird. */
164                     if (cLead < pConvertData->mnLeadStart
165                         || cLead > pConvertData->mnLeadEnd
166                         || cTrail < pConvertData->mnTrailStart
167                         || cTrail > pConvertData->mnTrailEnd)
168                     {
169                         *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
170                         if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
171                         {
172                             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
173                             break;
174                         }
175                         else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
176                         {
177                             pSrcBuf++;
178                             continue;
179                         }
180                         else
181                             cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
182                     }
183                     else
184                     {
185                         *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
186                         if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
187                         {
188                             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
189                             break;
190                         }
191                         else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
192                         {
193                             pSrcBuf++;
194                             continue;
195                         }
196                         else
197                             cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
198                     }
199                 }
200             }
201         }
202 
203         if ( pDestBuf == pEndDestBuf )
204         {
205             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
206             break;
207         }
208 
209         *pDestBuf = cConv;
210         pDestBuf++;
211         pSrcBuf++;
212     }
213 
214     *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
215     return (nDestChars - (pEndDestBuf-pDestBuf));
216 }
217 
218 /* ----------------------------------------------------------------------- */
219 
ImplUnicodeToDBCS(const ImplTextConverterData * pData,void * pContext,const sal_Unicode * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)220 sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext,
221                             const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
222                             sal_Char* pDestBuf, sal_Size nDestBytes,
223                             sal_uInt32 nFlags, sal_uInt32* pInfo,
224                             sal_Size* pSrcCvtChars )
225 {
226     sal_uInt16                  cConv;
227     sal_Unicode                 c;
228     sal_uChar                   nHighChar;
229     sal_uChar                   nLowChar;
230     const ImplUniToDBCSHighTab* pHighEntry;
231     const ImplDBCSConvertData*  pConvertData = (const ImplDBCSConvertData*)pData;
232     const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
233     sal_Char*                   pEndDestBuf;
234     const sal_Unicode*          pEndSrcBuf;
235 
236     sal_Bool bCheckRange = (pConvertData->mnLeadStart != 0
237                             || pConvertData->mnLeadEnd != 0xFF);
238         /* this statement has the effect that this extra check is only done for
239            EUC-KR, which uses the MS-949 tables, but does not support the full
240            range of MS-949 */
241 
242     (void) pContext; /* unused */
243 
244     *pInfo = 0;
245     pEndDestBuf = pDestBuf+nDestBytes;
246     pEndSrcBuf  = pSrcBuf+nSrcChars;
247     while ( pSrcBuf < pEndSrcBuf )
248     {
249         c = *pSrcBuf;
250         nHighChar = (sal_uChar)((c >> 8) & 0xFF);
251         nLowChar = (sal_uChar)(c & 0xFF);
252 
253         /* get entry for the high byte */
254         pHighEntry = pHighTab+nHighChar;
255 
256         /* is low byte in the table range */
257         if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
258         {
259             cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
260             if (bCheckRange && cConv > 0x7F
261                 && ((cConv >> 8) < pConvertData->mnLeadStart
262                     || (cConv >> 8) > pConvertData->mnLeadEnd
263                     || (cConv & 0xFF) < pConvertData->mnTrailStart
264                     || (cConv & 0xFF) > pConvertData->mnTrailEnd))
265                 cConv = 0;
266         }
267         else
268             cConv = 0;
269 
270         if (cConv == 0 && c != 0)
271         {
272             /* Map to EUDC ranges: */
273             ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
274             sal_uInt32 i;
275             for (i = 0; i < pConvertData->mnEUDCCount; ++i)
276             {
277                 if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
278                 {
279                     sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
280                     sal_uInt32 nLeadOff
281                         = nIndex / pEUDCTab->mnTrailRangeCount;
282                     sal_uInt32 nTrailOff
283                         = nIndex % pEUDCTab->mnTrailRangeCount;
284                     sal_uInt32 nSize;
285                     cConv = (sal_uInt16)
286                                 ((pEUDCTab->mnLeadStart + nLeadOff) << 8);
287                     nSize
288                         = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
289                     if (nTrailOff < nSize)
290                     {
291                         cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
292                         break;
293                     }
294                     nTrailOff -= nSize;
295                     nSize
296                         = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
297                     if (nTrailOff < nSize)
298                     {
299                         cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
300                         break;
301                     }
302                     nTrailOff -= nSize;
303                     cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
304                     break;
305                 }
306                 pEUDCTab++;
307             }
308 
309             /* FIXME
310              * SB: Not sure why this is in here.  Plus, it does not work as
311              * intended when (c & 0xFF) == 0, because the next !cConv check
312              * will then think c has not yet been converted...
313              */
314             if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
315                 && c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
316             {
317                 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
318                     cConv = (sal_Char)(sal_uChar)(c & 0xFF);
319             }
320         }
321 
322         if ( !cConv )
323         {
324             if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
325             {
326                 /* !!! */
327             }
328 
329             if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
330             {
331                 /* !!! */
332             }
333 
334             /* Handle undefined and surrogates characters */
335             /* (all surrogates characters are undefined) */
336             if (ImplHandleUndefinedUnicodeToTextChar(pData,
337                                                      &pSrcBuf,
338                                                      pEndSrcBuf,
339                                                      &pDestBuf,
340                                                      pEndDestBuf,
341                                                      nFlags,
342                                                      pInfo))
343                 continue;
344             else
345                 break;
346         }
347 
348         /* SingleByte */
349         if ( !(cConv & 0xFF00) )
350         {
351             if ( pDestBuf == pEndDestBuf )
352             {
353                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
354                 break;
355             }
356 
357             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
358             pDestBuf++;
359         }
360         else
361         {
362             if ( pDestBuf+1 >= pEndDestBuf )
363             {
364                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
365                 break;
366             }
367 
368             *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
369             pDestBuf++;
370             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
371             pDestBuf++;
372         }
373 
374         pSrcBuf++;
375     }
376 
377     *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
378     return (nDestBytes - (pEndDestBuf-pDestBuf));
379 }
380 
381 /* ======================================================================= */
382 
383 #define JIS_EUC_LEAD_OFF                                        0x80
384 #define JIS_EUC_TRAIL_OFF                                       0x80
385 
386 /* ----------------------------------------------------------------------- */
387 
ImplEUCJPToUnicode(const ImplTextConverterData * pData,void * pContext,const sal_Char * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)388 sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData,
389                              void* pContext,
390                              const sal_Char* pSrcBuf, sal_Size nSrcBytes,
391                              sal_Unicode* pDestBuf, sal_Size nDestChars,
392                              sal_uInt32 nFlags, sal_uInt32* pInfo,
393                              sal_Size* pSrcCvtBytes )
394 {
395     sal_uChar                   c;
396     sal_uChar                   cLead = '\0';
397     sal_uChar                   cTrail = '\0';
398     sal_Unicode                 cConv;
399     const ImplDBCSToUniLeadTab* pLeadEntry;
400     const ImplDBCSToUniLeadTab* pLeadTab;
401     const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
402     sal_Unicode*                pEndDestBuf;
403     const sal_Char*             pEndSrcBuf;
404 
405     (void) pContext; /* unused */
406 
407     *pInfo = 0;
408     pEndDestBuf = pDestBuf+nDestChars;
409     pEndSrcBuf  = pSrcBuf+nSrcBytes;
410     while ( pSrcBuf < pEndSrcBuf )
411     {
412         c = (sal_uChar)*pSrcBuf;
413 
414         /* ASCII */
415         if ( c <= 0x7F )
416             cConv = c;
417         else
418         {
419             /* SS2 - Half-width katakana */
420             /* 8E + A1-DF */
421             if ( c == 0x8E )
422             {
423                 /* Source buffer to small */
424                 if ( pSrcBuf + 1 == pEndSrcBuf )
425                 {
426                     *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
427                     break;
428                 }
429 
430                 pSrcBuf++;
431                 c = (sal_uChar)*pSrcBuf;
432                 if ( (c >= 0xA1) && (c <= 0xDF) )
433                     cConv = 0xFF61+(c-0xA1);
434                 else
435                 {
436                     cConv = 0;
437                     cLead = 0x8E;
438                     cTrail = c;
439                 }
440             }
441             else
442             {
443                 /* SS3 - JIS 0212-1990 */
444                 /* 8F + A1-FE + A1-FE */
445                 if ( c == 0x8F )
446                 {
447                     /* Source buffer to small */
448                     if (pEndSrcBuf - pSrcBuf < 3)
449                     {
450                         *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
451                         break;
452                     }
453 
454                     pSrcBuf++;
455                     cLead = (sal_uChar)*pSrcBuf;
456                     pSrcBuf++;
457                     cTrail = (sal_uChar)*pSrcBuf;
458                     pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
459                 }
460                 /* CodeSet 2 JIS 0208-1997 */
461                 /* A1-FE + A1-FE */
462                 else
463                 {
464                     /* Source buffer to small */
465                     if ( pSrcBuf + 1 == pEndSrcBuf )
466                     {
467                         *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
468                         break;
469                     }
470 
471                     cLead = c;
472                     pSrcBuf++;
473                     cTrail = (sal_uChar)*pSrcBuf;
474                     pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
475                 }
476 
477                 /* Undefined Range */
478                 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
479                     cConv = 0;
480                 else
481                 {
482                     cLead   -= JIS_EUC_LEAD_OFF;
483                     cTrail  -= JIS_EUC_TRAIL_OFF;
484                     pLeadEntry = pLeadTab+cLead;
485                     if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
486                         cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
487                     else
488                         cConv = 0;
489                 }
490             }
491 
492             if ( !cConv )
493             {
494                 /* Wir vergleichen den kompletten Trailbereich den wir */
495                 /* definieren, der normalerweise groesser sein kann als */
496                 /* der definierte. Dies machen wir, damit Erweiterungen von */
497                 /* uns nicht beruecksichtigten Encodings so weit wie */
498                 /* moeglich auch richtig zu behandeln, das double byte */
499                 /* characters auch als ein einzelner Character behandelt */
500                 /* wird. */
501                 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
502                 {
503                     *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
504                     if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
505                     {
506                         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
507                         break;
508                     }
509                     else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
510                     {
511                         pSrcBuf++;
512                         continue;
513                     }
514                     else
515                         cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
516                 }
517                 else
518                 {
519                     *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
520                     if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
521                     {
522                         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
523                         break;
524                     }
525                     else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
526                     {
527                         pSrcBuf++;
528                         continue;
529                     }
530                     else
531                         cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
532                 }
533             }
534         }
535 
536         if ( pDestBuf == pEndDestBuf )
537         {
538             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
539             break;
540         }
541 
542         *pDestBuf = cConv;
543         pDestBuf++;
544         pSrcBuf++;
545     }
546 
547     *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
548     return (nDestChars - (pEndDestBuf-pDestBuf));
549 }
550 
551 /* ----------------------------------------------------------------------- */
552 
ImplUnicodeToEUCJP(const ImplTextConverterData * pData,void * pContext,const sal_Unicode * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)553 sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData,
554                              void* pContext,
555                              const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
556                              sal_Char* pDestBuf, sal_Size nDestBytes,
557                              sal_uInt32 nFlags, sal_uInt32* pInfo,
558                              sal_Size* pSrcCvtChars )
559 {
560     sal_uInt32                  cConv;
561     sal_Unicode                 c;
562     sal_uChar                   nHighChar;
563     sal_uChar                   nLowChar;
564     const ImplUniToDBCSHighTab* pHighEntry;
565     const ImplUniToDBCSHighTab* pHighTab;
566     const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
567     sal_Char*                   pEndDestBuf;
568     const sal_Unicode*          pEndSrcBuf;
569 
570     (void) pContext; /* unused */
571 
572     *pInfo = 0;
573     pEndDestBuf = pDestBuf+nDestBytes;
574     pEndSrcBuf  = pSrcBuf+nSrcChars;
575     while ( pSrcBuf < pEndSrcBuf )
576     {
577         c = *pSrcBuf;
578 
579         /* ASCII */
580         if ( c <= 0x7F )
581             cConv = c;
582         /* Half-width katakana */
583         else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
584             cConv = 0x8E00+0xA1+(c-0xFF61);
585         else
586         {
587             nHighChar = (sal_uChar)((c >> 8) & 0xFF);
588             nLowChar = (sal_uChar)(c & 0xFF);
589 
590             /* JIS 0208 */
591             pHighTab = pConvertData->mpUniToJIS0208HighTab;
592             pHighEntry = pHighTab+nHighChar;
593             if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
594             {
595                 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
596                 if (cConv != 0)
597                     cConv |= 0x8080;
598             }
599             else
600                 cConv = 0;
601 
602             /* JIS 0212 */
603             if ( !cConv )
604             {
605                 pHighTab = pConvertData->mpUniToJIS0212HighTab;
606                 pHighEntry = pHighTab+nHighChar;
607                 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
608                 {
609                     cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
610                     if (cConv != 0)
611                         cConv |= 0x8F8080;
612                 }
613 
614                 if ( !cConv )
615                 {
616                     if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
617                     {
618                         /* !!! */
619                     }
620 
621                     if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
622                     {
623                         /* !!! */
624                     }
625 
626                     /* Handle undefined and surrogates characters */
627                     /* (all surrogates characters are undefined) */
628                     if (ImplHandleUndefinedUnicodeToTextChar(pData,
629                                                              &pSrcBuf,
630                                                              pEndSrcBuf,
631                                                              &pDestBuf,
632                                                              pEndDestBuf,
633                                                              nFlags,
634                                                              pInfo))
635                         continue;
636                     else
637                         break;
638                 }
639             }
640         }
641 
642         /* SingleByte */
643         if ( !(cConv & 0xFFFF00) )
644         {
645             if ( pDestBuf == pEndDestBuf )
646             {
647                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
648                 break;
649             }
650 
651             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
652             pDestBuf++;
653         }
654         /* DoubleByte */
655         else if ( !(cConv & 0xFF0000) )
656         {
657             if ( pDestBuf+1 >= pEndDestBuf )
658             {
659                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
660                 break;
661             }
662 
663             *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
664             pDestBuf++;
665             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
666             pDestBuf++;
667         }
668         else
669         {
670             if ( pDestBuf+2 >= pEndDestBuf )
671             {
672                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
673                 break;
674             }
675 
676             *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 16) & 0xFF);
677             pDestBuf++;
678             *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
679             pDestBuf++;
680             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
681             pDestBuf++;
682         }
683 
684         pSrcBuf++;
685     }
686 
687     *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
688     return (nDestBytes - (pEndDestBuf-pDestBuf));
689 }
690