xref: /aoo41x/main/sal/textenc/tcvtmb.c (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include "tenchelp.h"
29 #include "unichars.h"
30 #include "rtl/textcvt.h"
31 
32 /* ======================================================================= */
33 
34 /* DBCS to Unicode conversion routine use a lead table for the first byte, */
35 /* where we determine the trail table or for single byte chars the unicode */
36 /* value. We have for all lead byte a separate table, because we can */
37 /* then share many tables for diffrent charset encodings. */
38 
39 /* ======================================================================= */
40 
41 sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext,
42                             const sal_Char* pSrcBuf, sal_Size nSrcBytes,
43                             sal_Unicode* pDestBuf, sal_Size nDestChars,
44                             sal_uInt32 nFlags, sal_uInt32* pInfo,
45                             sal_Size* pSrcCvtBytes )
46 {
47     sal_uChar                   cLead;
48     sal_uChar                   cTrail;
49     sal_Unicode                 cConv;
50     const ImplDBCSToUniLeadTab* pLeadEntry;
51     const ImplDBCSConvertData*  pConvertData = (const ImplDBCSConvertData*)pData;
52     const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
53     sal_Unicode*                pEndDestBuf;
54     const sal_Char*             pEndSrcBuf;
55 
56     (void) pContext; /* unused */
57 
58     *pInfo = 0;
59     pEndDestBuf = pDestBuf+nDestChars;
60     pEndSrcBuf  = pSrcBuf+nSrcBytes;
61     while ( pSrcBuf < pEndSrcBuf )
62     {
63         cLead = (sal_uChar)*pSrcBuf;
64 
65         /* get entry for the lead byte */
66         pLeadEntry = pLeadTab+cLead;
67 
68         /* SingleByte char? */
69         if (pLeadEntry->mpToUniTrailTab == NULL
70             || cLead < pConvertData->mnLeadStart
71             || cLead > pConvertData->mnLeadEnd)
72         {
73             cConv = pLeadEntry->mnUniChar;
74             if ( !cConv && (cLead != 0) )
75             {
76                 *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
77                 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
78                 {
79                     *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
80                     break;
81                 }
82                 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
83                 {
84                     pSrcBuf++;
85                     continue;
86                 }
87                 else
88                     cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
89             }
90         }
91         else
92         {
93             /* Source buffer to small */
94             if ( pSrcBuf +1 == pEndSrcBuf )
95             {
96                 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
97                 break;
98             }
99 
100             pSrcBuf++;
101             cTrail = (sal_uChar)*pSrcBuf;
102             if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
103                 cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
104             else
105                 cConv = 0;
106 
107             if ( !cConv )
108             {
109                 /* EUDC Ranges */
110                 sal_uInt16              i;
111                 const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
112                 for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
113                 {
114                     if ( (cLead >= pEUDCTab->mnLeadStart) &&
115                          (cLead <= pEUDCTab->mnLeadEnd) )
116                     {
117                         sal_uInt16 nTrailCount = 0;
118                         if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
119                              (cTrail <= pEUDCTab->mnTrail1End) )
120                         {
121                             cConv = pEUDCTab->mnUniStart+
122                                     ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
123                                     (cTrail-pEUDCTab->mnTrail1Start);
124                             break;
125                         }
126                         else
127                         {
128                             nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
129                             if ( (pEUDCTab->mnTrailCount >= 2) &&
130                                  (cTrail >= pEUDCTab->mnTrail2Start) &&
131                                  (cTrail <= pEUDCTab->mnTrail2End) )
132                             {
133                                 cConv = pEUDCTab->mnUniStart+
134                                         ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
135                                         nTrailCount+
136                                         (cTrail-pEUDCTab->mnTrail2Start);
137                                 break;
138                             }
139                             else
140                             {
141                                 nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
142                                 if ( (pEUDCTab->mnTrailCount >= 3) &&
143                                      (cTrail >= pEUDCTab->mnTrail3Start) &&
144                                      (cTrail <= pEUDCTab->mnTrail3End) )
145                                 {
146                                     cConv = pEUDCTab->mnUniStart+
147                                             ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
148                                             nTrailCount+
149                                             (cTrail-pEUDCTab->mnTrail3Start);
150                                     break;
151                                 }
152                             }
153                         }
154                     }
155 
156                     pEUDCTab++;
157                 }
158 
159                 if ( !cConv )
160                 {
161                     /* Wir vergleichen den kompletten Trailbereich den wir */
162                     /* definieren, der normalerweise groesser sein kann als */
163                     /* der definierte. Dies machen wir, damit Erweiterungen von */
164                     /* uns nicht beruecksichtigten Encodings so weit wie */
165                     /* moeglich auch richtig zu behandeln, das double byte */
166                     /* characters auch als ein einzelner Character behandelt */
167                     /* wird. */
168                     if (cLead < pConvertData->mnLeadStart
169                         || cLead > pConvertData->mnLeadEnd
170                         || cTrail < pConvertData->mnTrailStart
171                         || cTrail > pConvertData->mnTrailEnd)
172                     {
173                         *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
174                         if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
175                         {
176                             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
177                             break;
178                         }
179                         else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
180                         {
181                             pSrcBuf++;
182                             continue;
183                         }
184                         else
185                             cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
186                     }
187                     else
188                     {
189                         *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
190                         if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
191                         {
192                             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
193                             break;
194                         }
195                         else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
196                         {
197                             pSrcBuf++;
198                             continue;
199                         }
200                         else
201                             cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
202                     }
203                 }
204             }
205         }
206 
207         if ( pDestBuf == pEndDestBuf )
208         {
209             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
210             break;
211         }
212 
213         *pDestBuf = cConv;
214         pDestBuf++;
215         pSrcBuf++;
216     }
217 
218     *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
219     return (nDestChars - (pEndDestBuf-pDestBuf));
220 }
221 
222 /* ----------------------------------------------------------------------- */
223 
224 sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext,
225                             const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
226                             sal_Char* pDestBuf, sal_Size nDestBytes,
227                             sal_uInt32 nFlags, sal_uInt32* pInfo,
228                             sal_Size* pSrcCvtChars )
229 {
230     sal_uInt16                  cConv;
231     sal_Unicode                 c;
232     sal_uChar                   nHighChar;
233     sal_uChar                   nLowChar;
234     const ImplUniToDBCSHighTab* pHighEntry;
235     const ImplDBCSConvertData*  pConvertData = (const ImplDBCSConvertData*)pData;
236     const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
237     sal_Char*                   pEndDestBuf;
238     const sal_Unicode*          pEndSrcBuf;
239 
240     sal_Bool bCheckRange = (pConvertData->mnLeadStart != 0
241                             || pConvertData->mnLeadEnd != 0xFF);
242         /* this statement has the effect that this extra check is only done for
243            EUC-KR, which uses the MS-949 tables, but does not support the full
244            range of MS-949 */
245 
246     (void) pContext; /* unused */
247 
248     *pInfo = 0;
249     pEndDestBuf = pDestBuf+nDestBytes;
250     pEndSrcBuf  = pSrcBuf+nSrcChars;
251     while ( pSrcBuf < pEndSrcBuf )
252     {
253         c = *pSrcBuf;
254         nHighChar = (sal_uChar)((c >> 8) & 0xFF);
255         nLowChar = (sal_uChar)(c & 0xFF);
256 
257         /* get entry for the high byte */
258         pHighEntry = pHighTab+nHighChar;
259 
260         /* is low byte in the table range */
261         if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
262         {
263             cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
264             if (bCheckRange && cConv > 0x7F
265                 && ((cConv >> 8) < pConvertData->mnLeadStart
266                     || (cConv >> 8) > pConvertData->mnLeadEnd
267                     || (cConv & 0xFF) < pConvertData->mnTrailStart
268                     || (cConv & 0xFF) > pConvertData->mnTrailEnd))
269                 cConv = 0;
270         }
271         else
272             cConv = 0;
273 
274         if (cConv == 0 && c != 0)
275         {
276             /* Map to EUDC ranges: */
277             ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
278             sal_uInt32 i;
279             for (i = 0; i < pConvertData->mnEUDCCount; ++i)
280             {
281                 if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
282                 {
283                     sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
284                     sal_uInt32 nLeadOff
285                         = nIndex / pEUDCTab->mnTrailRangeCount;
286                     sal_uInt32 nTrailOff
287                         = nIndex % pEUDCTab->mnTrailRangeCount;
288                     sal_uInt32 nSize;
289                     cConv = (sal_uInt16)
290                                 ((pEUDCTab->mnLeadStart + nLeadOff) << 8);
291                     nSize
292                         = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
293                     if (nTrailOff < nSize)
294                     {
295                         cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
296                         break;
297                     }
298                     nTrailOff -= nSize;
299                     nSize
300                         = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
301                     if (nTrailOff < nSize)
302                     {
303                         cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
304                         break;
305                     }
306                     nTrailOff -= nSize;
307                     cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
308                     break;
309                 }
310                 pEUDCTab++;
311             }
312 
313             /* FIXME
314              * SB: Not sure why this is in here.  Plus, it does not work as
315              * intended when (c & 0xFF) == 0, because the next !cConv check
316              * will then think c has not yet been converted...
317              */
318             if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
319                 && c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
320             {
321                 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
322                     cConv = (sal_Char)(sal_uChar)(c & 0xFF);
323             }
324         }
325 
326         if ( !cConv )
327         {
328             if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
329             {
330                 /* !!! */
331             }
332 
333             if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
334             {
335                 /* !!! */
336             }
337 
338             /* Handle undefined and surrogates characters */
339             /* (all surrogates characters are undefined) */
340             if (ImplHandleUndefinedUnicodeToTextChar(pData,
341                                                      &pSrcBuf,
342                                                      pEndSrcBuf,
343                                                      &pDestBuf,
344                                                      pEndDestBuf,
345                                                      nFlags,
346                                                      pInfo))
347                 continue;
348             else
349                 break;
350         }
351 
352         /* SingleByte */
353         if ( !(cConv & 0xFF00) )
354         {
355             if ( pDestBuf == pEndDestBuf )
356             {
357                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
358                 break;
359             }
360 
361             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
362             pDestBuf++;
363         }
364         else
365         {
366             if ( pDestBuf+1 >= pEndDestBuf )
367             {
368                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
369                 break;
370             }
371 
372             *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
373             pDestBuf++;
374             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
375             pDestBuf++;
376         }
377 
378         pSrcBuf++;
379     }
380 
381     *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
382     return (nDestBytes - (pEndDestBuf-pDestBuf));
383 }
384 
385 /* ======================================================================= */
386 
387 #define JIS_EUC_LEAD_OFF                                        0x80
388 #define JIS_EUC_TRAIL_OFF                                       0x80
389 
390 /* ----------------------------------------------------------------------- */
391 
392 sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData,
393                              void* pContext,
394                              const sal_Char* pSrcBuf, sal_Size nSrcBytes,
395                              sal_Unicode* pDestBuf, sal_Size nDestChars,
396                              sal_uInt32 nFlags, sal_uInt32* pInfo,
397                              sal_Size* pSrcCvtBytes )
398 {
399     sal_uChar                   c;
400     sal_uChar                   cLead = '\0';
401     sal_uChar                   cTrail = '\0';
402     sal_Unicode                 cConv;
403     const ImplDBCSToUniLeadTab* pLeadEntry;
404     const ImplDBCSToUniLeadTab* pLeadTab;
405     const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
406     sal_Unicode*                pEndDestBuf;
407     const sal_Char*             pEndSrcBuf;
408 
409     (void) pContext; /* unused */
410 
411     *pInfo = 0;
412     pEndDestBuf = pDestBuf+nDestChars;
413     pEndSrcBuf  = pSrcBuf+nSrcBytes;
414     while ( pSrcBuf < pEndSrcBuf )
415     {
416         c = (sal_uChar)*pSrcBuf;
417 
418         /* ASCII */
419         if ( c <= 0x7F )
420             cConv = c;
421         else
422         {
423             /* SS2 - Half-width katakana */
424             /* 8E + A1-DF */
425             if ( c == 0x8E )
426             {
427                 /* Source buffer to small */
428                 if ( pSrcBuf + 1 == pEndSrcBuf )
429                 {
430                     *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
431                     break;
432                 }
433 
434                 pSrcBuf++;
435                 c = (sal_uChar)*pSrcBuf;
436                 if ( (c >= 0xA1) && (c <= 0xDF) )
437                     cConv = 0xFF61+(c-0xA1);
438                 else
439                 {
440                     cConv = 0;
441                     cLead = 0x8E;
442                     cTrail = c;
443                 }
444             }
445             else
446             {
447                 /* SS3 - JIS 0212-1990 */
448                 /* 8F + A1-FE + A1-FE */
449                 if ( c == 0x8F )
450                 {
451                     /* Source buffer to small */
452                     if (pEndSrcBuf - pSrcBuf < 3)
453                     {
454                         *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
455                         break;
456                     }
457 
458                     pSrcBuf++;
459                     cLead = (sal_uChar)*pSrcBuf;
460                     pSrcBuf++;
461                     cTrail = (sal_uChar)*pSrcBuf;
462                     pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
463                 }
464                 /* CodeSet 2 JIS 0208-1997 */
465                 /* A1-FE + A1-FE */
466                 else
467                 {
468                     /* Source buffer to small */
469                     if ( pSrcBuf + 1 == pEndSrcBuf )
470                     {
471                         *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
472                         break;
473                     }
474 
475                     cLead = c;
476                     pSrcBuf++;
477                     cTrail = (sal_uChar)*pSrcBuf;
478                     pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
479                 }
480 
481                 /* Undefined Range */
482                 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
483                     cConv = 0;
484                 else
485                 {
486                     cLead   -= JIS_EUC_LEAD_OFF;
487                     cTrail  -= JIS_EUC_TRAIL_OFF;
488                     pLeadEntry = pLeadTab+cLead;
489                     if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
490                         cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
491                     else
492                         cConv = 0;
493                 }
494             }
495 
496             if ( !cConv )
497             {
498                 /* Wir vergleichen den kompletten Trailbereich den wir */
499                 /* definieren, der normalerweise groesser sein kann als */
500                 /* der definierte. Dies machen wir, damit Erweiterungen von */
501                 /* uns nicht beruecksichtigten Encodings so weit wie */
502                 /* moeglich auch richtig zu behandeln, das double byte */
503                 /* characters auch als ein einzelner Character behandelt */
504                 /* wird. */
505                 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
506                 {
507                     *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
508                     if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
509                     {
510                         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
511                         break;
512                     }
513                     else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
514                     {
515                         pSrcBuf++;
516                         continue;
517                     }
518                     else
519                         cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
520                 }
521                 else
522                 {
523                     *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
524                     if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
525                     {
526                         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
527                         break;
528                     }
529                     else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
530                     {
531                         pSrcBuf++;
532                         continue;
533                     }
534                     else
535                         cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
536                 }
537             }
538         }
539 
540         if ( pDestBuf == pEndDestBuf )
541         {
542             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
543             break;
544         }
545 
546         *pDestBuf = cConv;
547         pDestBuf++;
548         pSrcBuf++;
549     }
550 
551     *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
552     return (nDestChars - (pEndDestBuf-pDestBuf));
553 }
554 
555 /* ----------------------------------------------------------------------- */
556 
557 sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData,
558                              void* pContext,
559                              const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
560                              sal_Char* pDestBuf, sal_Size nDestBytes,
561                              sal_uInt32 nFlags, sal_uInt32* pInfo,
562                              sal_Size* pSrcCvtChars )
563 {
564     sal_uInt32                  cConv;
565     sal_Unicode                 c;
566     sal_uChar                   nHighChar;
567     sal_uChar                   nLowChar;
568     const ImplUniToDBCSHighTab* pHighEntry;
569     const ImplUniToDBCSHighTab* pHighTab;
570     const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
571     sal_Char*                   pEndDestBuf;
572     const sal_Unicode*          pEndSrcBuf;
573 
574     (void) pContext; /* unused */
575 
576     *pInfo = 0;
577     pEndDestBuf = pDestBuf+nDestBytes;
578     pEndSrcBuf  = pSrcBuf+nSrcChars;
579     while ( pSrcBuf < pEndSrcBuf )
580     {
581         c = *pSrcBuf;
582 
583         /* ASCII */
584         if ( c <= 0x7F )
585             cConv = c;
586         /* Half-width katakana */
587         else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
588             cConv = 0x8E00+0xA1+(c-0xFF61);
589         else
590         {
591             nHighChar = (sal_uChar)((c >> 8) & 0xFF);
592             nLowChar = (sal_uChar)(c & 0xFF);
593 
594             /* JIS 0208 */
595             pHighTab = pConvertData->mpUniToJIS0208HighTab;
596             pHighEntry = pHighTab+nHighChar;
597             if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
598             {
599                 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
600                 if (cConv != 0)
601                     cConv |= 0x8080;
602             }
603             else
604                 cConv = 0;
605 
606             /* JIS 0212 */
607             if ( !cConv )
608             {
609                 pHighTab = pConvertData->mpUniToJIS0212HighTab;
610                 pHighEntry = pHighTab+nHighChar;
611                 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
612                 {
613                     cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
614                     if (cConv != 0)
615                         cConv |= 0x8F8080;
616                 }
617 
618                 if ( !cConv )
619                 {
620                     if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
621                     {
622                         /* !!! */
623                     }
624 
625                     if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
626                     {
627                         /* !!! */
628                     }
629 
630                     /* Handle undefined and surrogates characters */
631                     /* (all surrogates characters are undefined) */
632                     if (ImplHandleUndefinedUnicodeToTextChar(pData,
633                                                              &pSrcBuf,
634                                                              pEndSrcBuf,
635                                                              &pDestBuf,
636                                                              pEndDestBuf,
637                                                              nFlags,
638                                                              pInfo))
639                         continue;
640                     else
641                         break;
642                 }
643             }
644         }
645 
646         /* SingleByte */
647         if ( !(cConv & 0xFFFF00) )
648         {
649             if ( pDestBuf == pEndDestBuf )
650             {
651                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
652                 break;
653             }
654 
655             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
656             pDestBuf++;
657         }
658         /* DoubleByte */
659         else if ( !(cConv & 0xFF0000) )
660         {
661             if ( pDestBuf+1 >= pEndDestBuf )
662             {
663                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
664                 break;
665             }
666 
667             *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
668             pDestBuf++;
669             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
670             pDestBuf++;
671         }
672         else
673         {
674             if ( pDestBuf+2 >= pEndDestBuf )
675             {
676                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
677                 break;
678             }
679 
680             *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 16) & 0xFF);
681             pDestBuf++;
682             *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
683             pDestBuf++;
684             *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
685             pDestBuf++;
686         }
687 
688         pSrcBuf++;
689     }
690 
691     *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
692     return (nDestBytes - (pEndDestBuf-pDestBuf));
693 }
694