1*b1cdbd2cSJim Jagielski /**************************************************************
2*b1cdbd2cSJim Jagielski *
3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one
4*b1cdbd2cSJim Jagielski * or more contributor license agreements. See the NOTICE file
5*b1cdbd2cSJim Jagielski * distributed with this work for additional information
6*b1cdbd2cSJim Jagielski * regarding copyright ownership. The ASF licenses this file
7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the
8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance
9*b1cdbd2cSJim Jagielski * with the License. You may obtain a copy of the License at
10*b1cdbd2cSJim Jagielski *
11*b1cdbd2cSJim Jagielski * http://www.apache.org/licenses/LICENSE-2.0
12*b1cdbd2cSJim Jagielski *
13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing,
14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an
15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b1cdbd2cSJim Jagielski * KIND, either express or implied. See the License for the
17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations
18*b1cdbd2cSJim Jagielski * under the License.
19*b1cdbd2cSJim Jagielski *
20*b1cdbd2cSJim Jagielski *************************************************************/
21*b1cdbd2cSJim Jagielski
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski
24*b1cdbd2cSJim Jagielski #include "sal/types.h"
25*b1cdbd2cSJim Jagielski #include "rtl/alloc.h"
26*b1cdbd2cSJim Jagielski #include "rtl/textcvt.h"
27*b1cdbd2cSJim Jagielski
28*b1cdbd2cSJim Jagielski #include "converter.h"
29*b1cdbd2cSJim Jagielski #include "tenchelp.h"
30*b1cdbd2cSJim Jagielski #include "unichars.h"
31*b1cdbd2cSJim Jagielski
32*b1cdbd2cSJim Jagielski struct ImplUtf8ToUnicodeContext
33*b1cdbd2cSJim Jagielski {
34*b1cdbd2cSJim Jagielski sal_uInt32 nUtf32;
35*b1cdbd2cSJim Jagielski int nShift;
36*b1cdbd2cSJim Jagielski sal_Bool bCheckBom;
37*b1cdbd2cSJim Jagielski };
38*b1cdbd2cSJim Jagielski
39*b1cdbd2cSJim Jagielski struct ImplUnicodeToUtf8Context
40*b1cdbd2cSJim Jagielski {
41*b1cdbd2cSJim Jagielski sal_Unicode nHighSurrogate; /* 0xFFFF: write BOM */
42*b1cdbd2cSJim Jagielski };
43*b1cdbd2cSJim Jagielski
ImplCreateUtf8ToUnicodeContext(void)44*b1cdbd2cSJim Jagielski void * ImplCreateUtf8ToUnicodeContext(void)
45*b1cdbd2cSJim Jagielski {
46*b1cdbd2cSJim Jagielski void * p = rtl_allocateMemory(sizeof (struct ImplUtf8ToUnicodeContext));
47*b1cdbd2cSJim Jagielski ImplResetUtf8ToUnicodeContext(p);
48*b1cdbd2cSJim Jagielski return p;
49*b1cdbd2cSJim Jagielski }
50*b1cdbd2cSJim Jagielski
ImplResetUtf8ToUnicodeContext(void * pContext)51*b1cdbd2cSJim Jagielski void ImplResetUtf8ToUnicodeContext(void * pContext)
52*b1cdbd2cSJim Jagielski {
53*b1cdbd2cSJim Jagielski if (pContext != NULL)
54*b1cdbd2cSJim Jagielski {
55*b1cdbd2cSJim Jagielski ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = -1;
56*b1cdbd2cSJim Jagielski ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = sal_True;
57*b1cdbd2cSJim Jagielski }
58*b1cdbd2cSJim Jagielski }
59*b1cdbd2cSJim Jagielski
ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData,void * pContext,sal_Char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)60*b1cdbd2cSJim Jagielski sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData,
61*b1cdbd2cSJim Jagielski void * pContext, sal_Char const * pSrcBuf,
62*b1cdbd2cSJim Jagielski sal_Size nSrcBytes, sal_Unicode * pDestBuf,
63*b1cdbd2cSJim Jagielski sal_Size nDestChars, sal_uInt32 nFlags,
64*b1cdbd2cSJim Jagielski sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes)
65*b1cdbd2cSJim Jagielski {
66*b1cdbd2cSJim Jagielski /*
67*b1cdbd2cSJim Jagielski This function is very liberal with the UTF-8 input. Accepted are:
68*b1cdbd2cSJim Jagielski - non-shortest forms (e.g., C0 41 instead of 41 to represent U+0041)
69*b1cdbd2cSJim Jagielski - surrogates (e.g., ED A0 80 to represent U+D800)
70*b1cdbd2cSJim Jagielski - encodings with up to six bytes (everything outside the range
71*b1cdbd2cSJim Jagielski U+0000..10FFFF is considered "undefined")
72*b1cdbd2cSJim Jagielski The first two of these points allow this routine to translate from both
73*b1cdbd2cSJim Jagielski RTL_TEXTENCODING_UTF8 and RTL_TEXTENCODING_JAVA_UTF8.
74*b1cdbd2cSJim Jagielski */
75*b1cdbd2cSJim Jagielski
76*b1cdbd2cSJim Jagielski int bJavaUtf8 = pData != NULL;
77*b1cdbd2cSJim Jagielski sal_uInt32 nUtf32 = 0;
78*b1cdbd2cSJim Jagielski int nShift = -1;
79*b1cdbd2cSJim Jagielski sal_Bool bCheckBom = sal_True;
80*b1cdbd2cSJim Jagielski sal_uInt32 nInfo = 0;
81*b1cdbd2cSJim Jagielski sal_uChar const * pSrcBufPtr = (sal_uChar const *) pSrcBuf;
82*b1cdbd2cSJim Jagielski sal_uChar const * pSrcBufEnd = pSrcBufPtr + nSrcBytes;
83*b1cdbd2cSJim Jagielski sal_Unicode * pDestBufPtr = pDestBuf;
84*b1cdbd2cSJim Jagielski sal_Unicode * pDestBufEnd = pDestBufPtr + nDestChars;
85*b1cdbd2cSJim Jagielski
86*b1cdbd2cSJim Jagielski if (pContext != NULL)
87*b1cdbd2cSJim Jagielski {
88*b1cdbd2cSJim Jagielski nUtf32 = ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32;
89*b1cdbd2cSJim Jagielski nShift = ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift;
90*b1cdbd2cSJim Jagielski bCheckBom = ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom;
91*b1cdbd2cSJim Jagielski }
92*b1cdbd2cSJim Jagielski
93*b1cdbd2cSJim Jagielski while (pSrcBufPtr < pSrcBufEnd)
94*b1cdbd2cSJim Jagielski {
95*b1cdbd2cSJim Jagielski sal_Bool bUndefined = sal_False;
96*b1cdbd2cSJim Jagielski int bConsume = sal_True;
97*b1cdbd2cSJim Jagielski sal_uInt32 nChar = *pSrcBufPtr++;
98*b1cdbd2cSJim Jagielski if (nShift < 0)
99*b1cdbd2cSJim Jagielski if (nChar <= 0x7F)
100*b1cdbd2cSJim Jagielski {
101*b1cdbd2cSJim Jagielski nUtf32 = nChar;
102*b1cdbd2cSJim Jagielski goto transform;
103*b1cdbd2cSJim Jagielski }
104*b1cdbd2cSJim Jagielski else if (nChar <= 0xBF)
105*b1cdbd2cSJim Jagielski goto bad_input;
106*b1cdbd2cSJim Jagielski else if (nChar <= 0xDF)
107*b1cdbd2cSJim Jagielski {
108*b1cdbd2cSJim Jagielski nUtf32 = (nChar & 0x1F) << 6;
109*b1cdbd2cSJim Jagielski nShift = 0;
110*b1cdbd2cSJim Jagielski }
111*b1cdbd2cSJim Jagielski else if (nChar <= 0xEF)
112*b1cdbd2cSJim Jagielski {
113*b1cdbd2cSJim Jagielski nUtf32 = (nChar & 0x0F) << 12;
114*b1cdbd2cSJim Jagielski nShift = 6;
115*b1cdbd2cSJim Jagielski }
116*b1cdbd2cSJim Jagielski else if (nChar <= 0xF7)
117*b1cdbd2cSJim Jagielski {
118*b1cdbd2cSJim Jagielski nUtf32 = (nChar & 0x07) << 18;
119*b1cdbd2cSJim Jagielski nShift = 12;
120*b1cdbd2cSJim Jagielski }
121*b1cdbd2cSJim Jagielski else if (nChar <= 0xFB)
122*b1cdbd2cSJim Jagielski {
123*b1cdbd2cSJim Jagielski nUtf32 = (nChar & 0x03) << 24;
124*b1cdbd2cSJim Jagielski nShift = 18;
125*b1cdbd2cSJim Jagielski }
126*b1cdbd2cSJim Jagielski else if (nChar <= 0xFD)
127*b1cdbd2cSJim Jagielski {
128*b1cdbd2cSJim Jagielski nUtf32 = (nChar & 0x01) << 30;
129*b1cdbd2cSJim Jagielski nShift = 24;
130*b1cdbd2cSJim Jagielski }
131*b1cdbd2cSJim Jagielski else
132*b1cdbd2cSJim Jagielski goto bad_input;
133*b1cdbd2cSJim Jagielski else if ((nChar & 0xC0) == 0x80)
134*b1cdbd2cSJim Jagielski {
135*b1cdbd2cSJim Jagielski nUtf32 |= (nChar & 0x3F) << nShift;
136*b1cdbd2cSJim Jagielski if (nShift == 0)
137*b1cdbd2cSJim Jagielski goto transform;
138*b1cdbd2cSJim Jagielski else
139*b1cdbd2cSJim Jagielski nShift -= 6;
140*b1cdbd2cSJim Jagielski }
141*b1cdbd2cSJim Jagielski else
142*b1cdbd2cSJim Jagielski {
143*b1cdbd2cSJim Jagielski /*
144*b1cdbd2cSJim Jagielski This byte is preceeded by a broken UTF-8 sequence; if this byte
145*b1cdbd2cSJim Jagielski is neither in the range [0x80..0xBF] nor in the range
146*b1cdbd2cSJim Jagielski [0xFE..0xFF], assume that this byte does not belong to that
147*b1cdbd2cSJim Jagielski broken sequence, but instead starts a new, legal UTF-8 sequence:
148*b1cdbd2cSJim Jagielski */
149*b1cdbd2cSJim Jagielski bConsume = nChar >= 0xFE;
150*b1cdbd2cSJim Jagielski goto bad_input;
151*b1cdbd2cSJim Jagielski }
152*b1cdbd2cSJim Jagielski continue;
153*b1cdbd2cSJim Jagielski
154*b1cdbd2cSJim Jagielski transform:
155*b1cdbd2cSJim Jagielski if (!bCheckBom || nUtf32 != 0xFEFF
156*b1cdbd2cSJim Jagielski || (nFlags & RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE) == 0
157*b1cdbd2cSJim Jagielski || bJavaUtf8)
158*b1cdbd2cSJim Jagielski {
159*b1cdbd2cSJim Jagielski if (nUtf32 <= 0xFFFF)
160*b1cdbd2cSJim Jagielski if (pDestBufPtr != pDestBufEnd)
161*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Unicode) nUtf32;
162*b1cdbd2cSJim Jagielski else
163*b1cdbd2cSJim Jagielski goto no_output;
164*b1cdbd2cSJim Jagielski else if (nUtf32 <= 0x10FFFF)
165*b1cdbd2cSJim Jagielski if (pDestBufEnd - pDestBufPtr >= 2)
166*b1cdbd2cSJim Jagielski {
167*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32);
168*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Unicode) ImplGetLowSurrogate(nUtf32);
169*b1cdbd2cSJim Jagielski }
170*b1cdbd2cSJim Jagielski else
171*b1cdbd2cSJim Jagielski goto no_output;
172*b1cdbd2cSJim Jagielski else
173*b1cdbd2cSJim Jagielski {
174*b1cdbd2cSJim Jagielski bUndefined = sal_True;
175*b1cdbd2cSJim Jagielski goto bad_input;
176*b1cdbd2cSJim Jagielski }
177*b1cdbd2cSJim Jagielski }
178*b1cdbd2cSJim Jagielski nShift = -1;
179*b1cdbd2cSJim Jagielski bCheckBom = sal_False;
180*b1cdbd2cSJim Jagielski continue;
181*b1cdbd2cSJim Jagielski
182*b1cdbd2cSJim Jagielski bad_input:
183*b1cdbd2cSJim Jagielski switch (ImplHandleBadInputTextToUnicodeConversion(
184*b1cdbd2cSJim Jagielski bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
185*b1cdbd2cSJim Jagielski &nInfo))
186*b1cdbd2cSJim Jagielski {
187*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_STOP:
188*b1cdbd2cSJim Jagielski nShift = -1;
189*b1cdbd2cSJim Jagielski bCheckBom = sal_False;
190*b1cdbd2cSJim Jagielski if (!bConsume)
191*b1cdbd2cSJim Jagielski --pSrcBufPtr;
192*b1cdbd2cSJim Jagielski break;
193*b1cdbd2cSJim Jagielski
194*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_CONTINUE:
195*b1cdbd2cSJim Jagielski nShift = -1;
196*b1cdbd2cSJim Jagielski bCheckBom = sal_False;
197*b1cdbd2cSJim Jagielski if (!bConsume)
198*b1cdbd2cSJim Jagielski --pSrcBufPtr;
199*b1cdbd2cSJim Jagielski continue;
200*b1cdbd2cSJim Jagielski
201*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_NO_OUTPUT:
202*b1cdbd2cSJim Jagielski goto no_output;
203*b1cdbd2cSJim Jagielski }
204*b1cdbd2cSJim Jagielski break;
205*b1cdbd2cSJim Jagielski
206*b1cdbd2cSJim Jagielski no_output:
207*b1cdbd2cSJim Jagielski --pSrcBufPtr;
208*b1cdbd2cSJim Jagielski nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
209*b1cdbd2cSJim Jagielski break;
210*b1cdbd2cSJim Jagielski }
211*b1cdbd2cSJim Jagielski
212*b1cdbd2cSJim Jagielski if (nShift >= 0
213*b1cdbd2cSJim Jagielski && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
214*b1cdbd2cSJim Jagielski | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
215*b1cdbd2cSJim Jagielski == 0)
216*b1cdbd2cSJim Jagielski {
217*b1cdbd2cSJim Jagielski if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
218*b1cdbd2cSJim Jagielski nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
219*b1cdbd2cSJim Jagielski else
220*b1cdbd2cSJim Jagielski switch (ImplHandleBadInputTextToUnicodeConversion(
221*b1cdbd2cSJim Jagielski sal_False, sal_True, 0, nFlags, &pDestBufPtr,
222*b1cdbd2cSJim Jagielski pDestBufEnd, &nInfo))
223*b1cdbd2cSJim Jagielski {
224*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_STOP:
225*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_CONTINUE:
226*b1cdbd2cSJim Jagielski nShift = -1;
227*b1cdbd2cSJim Jagielski bCheckBom = sal_False;
228*b1cdbd2cSJim Jagielski break;
229*b1cdbd2cSJim Jagielski
230*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_NO_OUTPUT:
231*b1cdbd2cSJim Jagielski nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
232*b1cdbd2cSJim Jagielski break;
233*b1cdbd2cSJim Jagielski }
234*b1cdbd2cSJim Jagielski }
235*b1cdbd2cSJim Jagielski
236*b1cdbd2cSJim Jagielski if (pContext != NULL)
237*b1cdbd2cSJim Jagielski {
238*b1cdbd2cSJim Jagielski ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32 = nUtf32;
239*b1cdbd2cSJim Jagielski ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = nShift;
240*b1cdbd2cSJim Jagielski ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = bCheckBom;
241*b1cdbd2cSJim Jagielski }
242*b1cdbd2cSJim Jagielski if (pInfo != NULL)
243*b1cdbd2cSJim Jagielski *pInfo = nInfo;
244*b1cdbd2cSJim Jagielski if (pSrcCvtBytes != NULL)
245*b1cdbd2cSJim Jagielski *pSrcCvtBytes = (sal_Char const *) pSrcBufPtr - pSrcBuf;
246*b1cdbd2cSJim Jagielski return pDestBufPtr - pDestBuf;
247*b1cdbd2cSJim Jagielski }
248*b1cdbd2cSJim Jagielski
ImplCreateUnicodeToUtf8Context(void)249*b1cdbd2cSJim Jagielski void * ImplCreateUnicodeToUtf8Context(void)
250*b1cdbd2cSJim Jagielski {
251*b1cdbd2cSJim Jagielski void * p = rtl_allocateMemory(sizeof (struct ImplUnicodeToUtf8Context));
252*b1cdbd2cSJim Jagielski ImplResetUnicodeToUtf8Context(p);
253*b1cdbd2cSJim Jagielski return p;
254*b1cdbd2cSJim Jagielski }
255*b1cdbd2cSJim Jagielski
ImplResetUnicodeToUtf8Context(void * pContext)256*b1cdbd2cSJim Jagielski void ImplResetUnicodeToUtf8Context(void * pContext)
257*b1cdbd2cSJim Jagielski {
258*b1cdbd2cSJim Jagielski if (pContext != NULL)
259*b1cdbd2cSJim Jagielski ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate = 0xFFFF;
260*b1cdbd2cSJim Jagielski }
261*b1cdbd2cSJim Jagielski
ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)262*b1cdbd2cSJim Jagielski sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData,
263*b1cdbd2cSJim Jagielski void * pContext, sal_Unicode const * pSrcBuf,
264*b1cdbd2cSJim Jagielski sal_Size nSrcChars, sal_Char * pDestBuf,
265*b1cdbd2cSJim Jagielski sal_Size nDestBytes, sal_uInt32 nFlags,
266*b1cdbd2cSJim Jagielski sal_uInt32 * pInfo, sal_Size* pSrcCvtChars)
267*b1cdbd2cSJim Jagielski {
268*b1cdbd2cSJim Jagielski int bJavaUtf8 = pData != NULL;
269*b1cdbd2cSJim Jagielski sal_Unicode nHighSurrogate = 0xFFFF;
270*b1cdbd2cSJim Jagielski sal_uInt32 nInfo = 0;
271*b1cdbd2cSJim Jagielski sal_Unicode const * pSrcBufPtr = pSrcBuf;
272*b1cdbd2cSJim Jagielski sal_Unicode const * pSrcBufEnd = pSrcBufPtr + nSrcChars;
273*b1cdbd2cSJim Jagielski sal_Char * pDestBufPtr = pDestBuf;
274*b1cdbd2cSJim Jagielski sal_Char * pDestBufEnd = pDestBufPtr + nDestBytes;
275*b1cdbd2cSJim Jagielski
276*b1cdbd2cSJim Jagielski if (pContext != NULL)
277*b1cdbd2cSJim Jagielski nHighSurrogate
278*b1cdbd2cSJim Jagielski = ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate;
279*b1cdbd2cSJim Jagielski
280*b1cdbd2cSJim Jagielski if (nHighSurrogate == 0xFFFF)
281*b1cdbd2cSJim Jagielski {
282*b1cdbd2cSJim Jagielski if ((nFlags & RTL_UNICODETOTEXT_FLAGS_GLOBAL_SIGNATURE) != 0
283*b1cdbd2cSJim Jagielski && !bJavaUtf8)
284*b1cdbd2cSJim Jagielski {
285*b1cdbd2cSJim Jagielski if (pDestBufEnd - pDestBufPtr >= 3)
286*b1cdbd2cSJim Jagielski {
287*b1cdbd2cSJim Jagielski /* Write BOM (U+FEFF) as UTF-8: */
288*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (unsigned char) 0xEF;
289*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBB;
290*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBF;
291*b1cdbd2cSJim Jagielski }
292*b1cdbd2cSJim Jagielski else
293*b1cdbd2cSJim Jagielski {
294*b1cdbd2cSJim Jagielski nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
295*b1cdbd2cSJim Jagielski goto done;
296*b1cdbd2cSJim Jagielski }
297*b1cdbd2cSJim Jagielski }
298*b1cdbd2cSJim Jagielski nHighSurrogate = 0;
299*b1cdbd2cSJim Jagielski }
300*b1cdbd2cSJim Jagielski
301*b1cdbd2cSJim Jagielski while (pSrcBufPtr < pSrcBufEnd)
302*b1cdbd2cSJim Jagielski {
303*b1cdbd2cSJim Jagielski sal_uInt32 nChar = *pSrcBufPtr++;
304*b1cdbd2cSJim Jagielski if (nHighSurrogate == 0)
305*b1cdbd2cSJim Jagielski {
306*b1cdbd2cSJim Jagielski if (ImplIsHighSurrogate(nChar) && !bJavaUtf8)
307*b1cdbd2cSJim Jagielski {
308*b1cdbd2cSJim Jagielski nHighSurrogate = (sal_Unicode) nChar;
309*b1cdbd2cSJim Jagielski continue;
310*b1cdbd2cSJim Jagielski }
311*b1cdbd2cSJim Jagielski }
312*b1cdbd2cSJim Jagielski else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8)
313*b1cdbd2cSJim Jagielski nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
314*b1cdbd2cSJim Jagielski else
315*b1cdbd2cSJim Jagielski goto bad_input;
316*b1cdbd2cSJim Jagielski
317*b1cdbd2cSJim Jagielski if ((ImplIsLowSurrogate(nChar) && !bJavaUtf8)
318*b1cdbd2cSJim Jagielski || ImplIsNoncharacter(nChar))
319*b1cdbd2cSJim Jagielski goto bad_input;
320*b1cdbd2cSJim Jagielski
321*b1cdbd2cSJim Jagielski if (nChar <= 0x7F && (!bJavaUtf8 || nChar != 0))
322*b1cdbd2cSJim Jagielski if (pDestBufPtr != pDestBufEnd)
323*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) nChar;
324*b1cdbd2cSJim Jagielski else
325*b1cdbd2cSJim Jagielski goto no_output;
326*b1cdbd2cSJim Jagielski else if (nChar <= 0x7FF)
327*b1cdbd2cSJim Jagielski if (pDestBufEnd - pDestBufPtr >= 2)
328*b1cdbd2cSJim Jagielski {
329*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0xC0 | (nChar >> 6));
330*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
331*b1cdbd2cSJim Jagielski }
332*b1cdbd2cSJim Jagielski else
333*b1cdbd2cSJim Jagielski goto no_output;
334*b1cdbd2cSJim Jagielski else if (nChar <= 0xFFFF)
335*b1cdbd2cSJim Jagielski if (pDestBufEnd - pDestBufPtr >= 3)
336*b1cdbd2cSJim Jagielski {
337*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0xE0 | (nChar >> 12));
338*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F));
339*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
340*b1cdbd2cSJim Jagielski }
341*b1cdbd2cSJim Jagielski else
342*b1cdbd2cSJim Jagielski goto no_output;
343*b1cdbd2cSJim Jagielski else if (pDestBufEnd - pDestBufPtr >= 4)
344*b1cdbd2cSJim Jagielski {
345*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0xF0 | (nChar >> 18));
346*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 12) & 0x3F));
347*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F));
348*b1cdbd2cSJim Jagielski *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
349*b1cdbd2cSJim Jagielski }
350*b1cdbd2cSJim Jagielski else
351*b1cdbd2cSJim Jagielski goto no_output;
352*b1cdbd2cSJim Jagielski nHighSurrogate = 0;
353*b1cdbd2cSJim Jagielski continue;
354*b1cdbd2cSJim Jagielski
355*b1cdbd2cSJim Jagielski bad_input:
356*b1cdbd2cSJim Jagielski switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0, nFlags,
357*b1cdbd2cSJim Jagielski &pDestBufPtr,
358*b1cdbd2cSJim Jagielski pDestBufEnd, &nInfo,
359*b1cdbd2cSJim Jagielski NULL, 0, NULL))
360*b1cdbd2cSJim Jagielski {
361*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_STOP:
362*b1cdbd2cSJim Jagielski nHighSurrogate = 0;
363*b1cdbd2cSJim Jagielski break;
364*b1cdbd2cSJim Jagielski
365*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_CONTINUE:
366*b1cdbd2cSJim Jagielski nHighSurrogate = 0;
367*b1cdbd2cSJim Jagielski continue;
368*b1cdbd2cSJim Jagielski
369*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_NO_OUTPUT:
370*b1cdbd2cSJim Jagielski goto no_output;
371*b1cdbd2cSJim Jagielski }
372*b1cdbd2cSJim Jagielski break;
373*b1cdbd2cSJim Jagielski
374*b1cdbd2cSJim Jagielski no_output:
375*b1cdbd2cSJim Jagielski --pSrcBufPtr;
376*b1cdbd2cSJim Jagielski nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
377*b1cdbd2cSJim Jagielski break;
378*b1cdbd2cSJim Jagielski }
379*b1cdbd2cSJim Jagielski
380*b1cdbd2cSJim Jagielski if (nHighSurrogate != 0
381*b1cdbd2cSJim Jagielski && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
382*b1cdbd2cSJim Jagielski | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
383*b1cdbd2cSJim Jagielski == 0)
384*b1cdbd2cSJim Jagielski {
385*b1cdbd2cSJim Jagielski if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
386*b1cdbd2cSJim Jagielski nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
387*b1cdbd2cSJim Jagielski else
388*b1cdbd2cSJim Jagielski switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0,
389*b1cdbd2cSJim Jagielski nFlags,
390*b1cdbd2cSJim Jagielski &pDestBufPtr,
391*b1cdbd2cSJim Jagielski pDestBufEnd,
392*b1cdbd2cSJim Jagielski &nInfo, NULL, 0,
393*b1cdbd2cSJim Jagielski NULL))
394*b1cdbd2cSJim Jagielski {
395*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_STOP:
396*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_CONTINUE:
397*b1cdbd2cSJim Jagielski nHighSurrogate = 0;
398*b1cdbd2cSJim Jagielski break;
399*b1cdbd2cSJim Jagielski
400*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_NO_OUTPUT:
401*b1cdbd2cSJim Jagielski nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
402*b1cdbd2cSJim Jagielski break;
403*b1cdbd2cSJim Jagielski }
404*b1cdbd2cSJim Jagielski }
405*b1cdbd2cSJim Jagielski
406*b1cdbd2cSJim Jagielski done:
407*b1cdbd2cSJim Jagielski if (pContext != NULL)
408*b1cdbd2cSJim Jagielski ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate
409*b1cdbd2cSJim Jagielski = nHighSurrogate;
410*b1cdbd2cSJim Jagielski if (pInfo != NULL)
411*b1cdbd2cSJim Jagielski *pInfo = nInfo;
412*b1cdbd2cSJim Jagielski if (pSrcCvtChars != NULL)
413*b1cdbd2cSJim Jagielski *pSrcCvtChars = pSrcBufPtr - pSrcBuf;
414*b1cdbd2cSJim Jagielski return pDestBufPtr - pDestBuf;
415*b1cdbd2cSJim Jagielski }
416