xref: /trunk/main/sal/textenc/tenchelp.h (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir #ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H
29*cdf0e10cSrcweir #define INCLUDED_RTL_TEXTENC_TENCHELP_H
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include "rtl/tencinfo.h"
32*cdf0e10cSrcweir #include "rtl/textenc.h"
33*cdf0e10cSrcweir #include "sal/types.h"
34*cdf0e10cSrcweir 
35*cdf0e10cSrcweir #if defined __cplusplus
36*cdf0e10cSrcweir extern "C" {
37*cdf0e10cSrcweir #endif /* __cplusplus */
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir #define RTL_TEXTCVT_BYTE_PRIVATE_START 0xF100
40*cdf0e10cSrcweir #define RTL_TEXTCVT_BYTE_PRIVATE_END 0xF1FF
41*cdf0e10cSrcweir 
42*cdf0e10cSrcweir /* ----------------- */
43*cdf0e10cSrcweir /* - TextConverter - */
44*cdf0e10cSrcweir /* ----------------- */
45*cdf0e10cSrcweir 
46*cdf0e10cSrcweir typedef void ImplTextConverterData;
47*cdf0e10cSrcweir 
48*cdf0e10cSrcweir typedef
49*cdf0e10cSrcweir sal_Size (* ImplConvertToUnicodeProc)(ImplTextConverterData const * pData,
50*cdf0e10cSrcweir                                       void * pContext,
51*cdf0e10cSrcweir                                       sal_Char const * pSrcBuf,
52*cdf0e10cSrcweir                                       sal_Size nSrcBytes,
53*cdf0e10cSrcweir                                       sal_Unicode * pDestBuf,
54*cdf0e10cSrcweir                                       sal_Size nDestChars,
55*cdf0e10cSrcweir                                       sal_uInt32 nFlags,
56*cdf0e10cSrcweir                                       sal_uInt32 * pInfo,
57*cdf0e10cSrcweir                                       sal_Size * pSrcCvtBytes);
58*cdf0e10cSrcweir 
59*cdf0e10cSrcweir typedef
60*cdf0e10cSrcweir sal_Size (* ImplConvertToTextProc)(ImplTextConverterData const * pData,
61*cdf0e10cSrcweir                                    void * pContext,
62*cdf0e10cSrcweir                                    sal_Unicode const * pSrcBuf,
63*cdf0e10cSrcweir                                    sal_Size nSrcChars,
64*cdf0e10cSrcweir                                    sal_Char * pDestBuf,
65*cdf0e10cSrcweir                                    sal_Size nDestBytes,
66*cdf0e10cSrcweir                                    sal_uInt32 nFlags,
67*cdf0e10cSrcweir                                    sal_uInt32 * pInfo,
68*cdf0e10cSrcweir                                    sal_Size * pSrcCvtChars);
69*cdf0e10cSrcweir 
70*cdf0e10cSrcweir typedef void * (* ImplCreateTextContextProc)(void);
71*cdf0e10cSrcweir 
72*cdf0e10cSrcweir typedef void (* ImplDestroyTextContextProc)(void * pContext);
73*cdf0e10cSrcweir 
74*cdf0e10cSrcweir typedef void (* ImplResetTextContextProc)(void * pContext);
75*cdf0e10cSrcweir 
76*cdf0e10cSrcweir typedef void * (* ImplCreateUnicodeContextProc)(void);
77*cdf0e10cSrcweir 
78*cdf0e10cSrcweir typedef void (* ImplDestroyUnicodeContextProc)(void * pContext);
79*cdf0e10cSrcweir 
80*cdf0e10cSrcweir typedef void (* ImplResetUnicodeContextProc)(void * pContext);
81*cdf0e10cSrcweir 
82*cdf0e10cSrcweir typedef struct
83*cdf0e10cSrcweir {
84*cdf0e10cSrcweir     ImplTextConverterData const * mpConvertData;
85*cdf0e10cSrcweir     ImplConvertToUnicodeProc mpConvertTextToUnicodeProc;
86*cdf0e10cSrcweir     ImplConvertToTextProc mpConvertUnicodeToTextProc;
87*cdf0e10cSrcweir     ImplCreateTextContextProc mpCreateTextToUnicodeContext;
88*cdf0e10cSrcweir     ImplDestroyTextContextProc mpDestroyTextToUnicodeContext;
89*cdf0e10cSrcweir     ImplResetTextContextProc mpResetTextToUnicodeContext;
90*cdf0e10cSrcweir     ImplCreateUnicodeContextProc mpCreateUnicodeToTextContext;
91*cdf0e10cSrcweir     ImplDestroyUnicodeContextProc mpDestroyUnicodeToTextContext;
92*cdf0e10cSrcweir     ImplResetUnicodeContextProc mpResetUnicodeToTextContext;
93*cdf0e10cSrcweir } ImplTextConverter;
94*cdf0e10cSrcweir 
95*cdf0e10cSrcweir /* ----------------------------- */
96*cdf0e10cSrcweir /* - TextEncoding - Structures - */
97*cdf0e10cSrcweir /* ----------------------------- */
98*cdf0e10cSrcweir 
99*cdf0e10cSrcweir typedef struct
100*cdf0e10cSrcweir {
101*cdf0e10cSrcweir     ImplTextConverter maConverter;
102*cdf0e10cSrcweir     sal_uInt8 mnMinCharSize;
103*cdf0e10cSrcweir     sal_uInt8 mnMaxCharSize;
104*cdf0e10cSrcweir     sal_uInt8 mnAveCharSize;
105*cdf0e10cSrcweir     sal_uInt8 mnBestWindowsCharset;
106*cdf0e10cSrcweir     char const * mpBestUnixCharset;
107*cdf0e10cSrcweir     char const * mpBestMimeCharset;
108*cdf0e10cSrcweir     sal_uInt32 mnInfoFlags;
109*cdf0e10cSrcweir } ImplTextEncodingData;
110*cdf0e10cSrcweir 
111*cdf0e10cSrcweir /* ----------------------------------- */
112*cdf0e10cSrcweir /* - TextConverter - Byte-Structures - */
113*cdf0e10cSrcweir /* ----------------------------------- */
114*cdf0e10cSrcweir 
115*cdf0e10cSrcweir typedef struct
116*cdf0e10cSrcweir {
117*cdf0e10cSrcweir     sal_uInt16                      mnUniChar;
118*cdf0e10cSrcweir     sal_uChar                       mnChar;
119*cdf0e10cSrcweir     sal_uChar                       mnChar2;
120*cdf0e10cSrcweir         // to cater for mappings like MS1258 with 1--2 bytes per Unicode char,
121*cdf0e10cSrcweir         // 0 if unused
122*cdf0e10cSrcweir } ImplUniCharTabData;
123*cdf0e10cSrcweir 
124*cdf0e10cSrcweir typedef struct
125*cdf0e10cSrcweir {
126*cdf0e10cSrcweir     const sal_uInt16*               mpToUniTab1;
127*cdf0e10cSrcweir     const sal_uInt16*               mpToUniTab2;
128*cdf0e10cSrcweir     sal_uChar                       mnToUniStart1;
129*cdf0e10cSrcweir     sal_uChar                       mnToUniEnd1;
130*cdf0e10cSrcweir     sal_uChar                       mnToUniStart2;
131*cdf0e10cSrcweir     sal_uChar                       mnToUniEnd2;
132*cdf0e10cSrcweir     const sal_uChar*                mpToCharTab1;
133*cdf0e10cSrcweir     const sal_uChar*                mpToCharTab2;
134*cdf0e10cSrcweir     const ImplUniCharTabData*       mpToCharExTab;
135*cdf0e10cSrcweir     sal_uInt16                      mnToCharStart1;
136*cdf0e10cSrcweir     sal_uInt16                      mnToCharEnd1;
137*cdf0e10cSrcweir     sal_uInt16                      mnToCharStart2;
138*cdf0e10cSrcweir     sal_uInt16                      mnToCharEnd2;
139*cdf0e10cSrcweir     sal_uInt16                      mnToCharExCount;
140*cdf0e10cSrcweir } ImplByteConvertData;
141*cdf0e10cSrcweir 
142*cdf0e10cSrcweir /* ----------------------------------- */
143*cdf0e10cSrcweir /* - TextConverter - DBCS-Structures - */
144*cdf0e10cSrcweir /* ----------------------------------- */
145*cdf0e10cSrcweir 
146*cdf0e10cSrcweir typedef struct
147*cdf0e10cSrcweir {
148*cdf0e10cSrcweir     sal_uChar                       mnLeadStart;
149*cdf0e10cSrcweir     sal_uChar                       mnLeadEnd;
150*cdf0e10cSrcweir     sal_uChar                       mnTrail1Start;
151*cdf0e10cSrcweir     sal_uChar                       mnTrail1End;
152*cdf0e10cSrcweir     sal_uChar                       mnTrail2Start;
153*cdf0e10cSrcweir     sal_uChar                       mnTrail2End;
154*cdf0e10cSrcweir     sal_uChar                       mnTrail3Start;
155*cdf0e10cSrcweir     sal_uChar                       mnTrail3End;
156*cdf0e10cSrcweir     sal_uChar                       mnTrailCount;
157*cdf0e10cSrcweir     sal_uInt16                      mnTrailRangeCount;
158*cdf0e10cSrcweir     sal_uInt16                      mnUniStart;
159*cdf0e10cSrcweir     sal_uInt16                      mnUniEnd;
160*cdf0e10cSrcweir } ImplDBCSEUDCData;
161*cdf0e10cSrcweir 
162*cdf0e10cSrcweir typedef struct
163*cdf0e10cSrcweir {
164*cdf0e10cSrcweir     sal_uInt16                      mnUniChar;
165*cdf0e10cSrcweir     sal_uInt8                       mnTrailStart;
166*cdf0e10cSrcweir     sal_uInt8                       mnTrailEnd;
167*cdf0e10cSrcweir     const sal_uInt16*               mpToUniTrailTab;
168*cdf0e10cSrcweir } ImplDBCSToUniLeadTab;
169*cdf0e10cSrcweir 
170*cdf0e10cSrcweir typedef struct
171*cdf0e10cSrcweir {
172*cdf0e10cSrcweir     sal_uInt8                       mnLowStart;
173*cdf0e10cSrcweir     sal_uInt8                       mnLowEnd;
174*cdf0e10cSrcweir     const sal_uInt16*               mpToUniTrailTab;
175*cdf0e10cSrcweir } ImplUniToDBCSHighTab;
176*cdf0e10cSrcweir 
177*cdf0e10cSrcweir typedef struct
178*cdf0e10cSrcweir {
179*cdf0e10cSrcweir     const ImplDBCSToUniLeadTab*     mpToUniLeadTab;
180*cdf0e10cSrcweir     const ImplUniToDBCSHighTab*     mpToDBCSHighTab;
181*cdf0e10cSrcweir     sal_uChar                       mnLeadStart;
182*cdf0e10cSrcweir     sal_uChar                       mnLeadEnd;
183*cdf0e10cSrcweir     sal_uChar                       mnTrailStart;
184*cdf0e10cSrcweir     sal_uChar                       mnTrailEnd;
185*cdf0e10cSrcweir     const ImplDBCSEUDCData*         mpEUDCTab;
186*cdf0e10cSrcweir     sal_uInt16                      mnEUDCCount;
187*cdf0e10cSrcweir } ImplDBCSConvertData;
188*cdf0e10cSrcweir 
189*cdf0e10cSrcweir /* ---------------------------------- */
190*cdf0e10cSrcweir /* - TextConverter - EUC-Structures - */
191*cdf0e10cSrcweir /* ---------------------------------- */
192*cdf0e10cSrcweir 
193*cdf0e10cSrcweir typedef struct
194*cdf0e10cSrcweir {
195*cdf0e10cSrcweir     const ImplDBCSToUniLeadTab*     mpJIS0208ToUniLeadTab;
196*cdf0e10cSrcweir     const ImplDBCSToUniLeadTab*     mpJIS0212ToUniLeadTab;
197*cdf0e10cSrcweir     const ImplUniToDBCSHighTab*     mpUniToJIS0208HighTab;
198*cdf0e10cSrcweir     const ImplUniToDBCSHighTab*     mpUniToJIS0212HighTab;
199*cdf0e10cSrcweir } ImplEUCJPConvertData;
200*cdf0e10cSrcweir 
201*cdf0e10cSrcweir /* --------------------------------- */
202*cdf0e10cSrcweir /* - TextConverter - HelpFunctions - */
203*cdf0e10cSrcweir /* --------------------------------- */
204*cdf0e10cSrcweir 
205*cdf0e10cSrcweir sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags);
206*cdf0e10cSrcweir 
207*cdf0e10cSrcweir sal_Bool
208*cdf0e10cSrcweir ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,
209*cdf0e10cSrcweir                                      sal_Unicode const ** ppSrcBuf,
210*cdf0e10cSrcweir                                      sal_Unicode const * pEndSrcBuf,
211*cdf0e10cSrcweir                                      sal_Char ** ppDestBuf,
212*cdf0e10cSrcweir                                      sal_Char const * pEndDestBuf,
213*cdf0e10cSrcweir                                      sal_uInt32 nFlags,
214*cdf0e10cSrcweir                                      sal_uInt32 * pInfo);
215*cdf0e10cSrcweir     /* sal_True means 'continue,' sal_False means 'break' */
216*cdf0e10cSrcweir 
217*cdf0e10cSrcweir /* ----------------------------- */
218*cdf0e10cSrcweir /* - TextConverter - Functions - */
219*cdf0e10cSrcweir /* ----------------------------- */
220*cdf0e10cSrcweir 
221*cdf0e10cSrcweir sal_Size ImplSymbolToUnicode( const ImplTextConverterData* pData, void* pContext,
222*cdf0e10cSrcweir                               const sal_Char* pSrcBuf, sal_Size nSrcBytes,
223*cdf0e10cSrcweir                               sal_Unicode* pDestBuf, sal_Size nDestChars,
224*cdf0e10cSrcweir                               sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes );
225*cdf0e10cSrcweir sal_Size ImplUnicodeToSymbol( const ImplTextConverterData* pData, void* pContext,
226*cdf0e10cSrcweir                               const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
227*cdf0e10cSrcweir                               sal_Char* pDestBuf, sal_Size nDestBytes,
228*cdf0e10cSrcweir                               sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars );
229*cdf0e10cSrcweir sal_Size ImplCharToUnicode( const ImplTextConverterData* pData, void* pContext,
230*cdf0e10cSrcweir                             const sal_Char* pSrcBuf, sal_Size nSrcBytes,
231*cdf0e10cSrcweir                             sal_Unicode* pDestBuf, sal_Size nDestChars,
232*cdf0e10cSrcweir                             sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtBytes );
233*cdf0e10cSrcweir sal_Size ImplUnicodeToChar( const ImplTextConverterData* pData, void* pContext,
234*cdf0e10cSrcweir                             const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
235*cdf0e10cSrcweir                             sal_Char* pDestBuf, sal_Size nDestBytes,
236*cdf0e10cSrcweir                             sal_uInt32 nFlags, sal_uInt32* pInfo, sal_Size* pSrcCvtChars );
237*cdf0e10cSrcweir sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext,
238*cdf0e10cSrcweir                             const sal_Char* pSrcBuf, sal_Size nSrcBytes,
239*cdf0e10cSrcweir                             sal_Unicode* pDestBuf, sal_Size nDestChars,
240*cdf0e10cSrcweir                             sal_uInt32 nFlags, sal_uInt32* pInfo,
241*cdf0e10cSrcweir                             sal_Size* pSrcCvtBytes );
242*cdf0e10cSrcweir sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext,
243*cdf0e10cSrcweir                             const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
244*cdf0e10cSrcweir                             sal_Char* pDestBuf, sal_Size nDestBytes,
245*cdf0e10cSrcweir                             sal_uInt32 nFlags, sal_uInt32* pInfo,
246*cdf0e10cSrcweir                             sal_Size* pSrcCvtChars );
247*cdf0e10cSrcweir sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData,
248*cdf0e10cSrcweir                              void* pContext,
249*cdf0e10cSrcweir                              const sal_Char* pSrcBuf, sal_Size nSrcBytes,
250*cdf0e10cSrcweir                              sal_Unicode* pDestBuf, sal_Size nDestChars,
251*cdf0e10cSrcweir                              sal_uInt32 nFlags, sal_uInt32* pInfo,
252*cdf0e10cSrcweir                              sal_Size* pSrcCvtBytes );
253*cdf0e10cSrcweir sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData,
254*cdf0e10cSrcweir                              void* pContext,
255*cdf0e10cSrcweir                              const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
256*cdf0e10cSrcweir                              sal_Char* pDestBuf, sal_Size nDestBytes,
257*cdf0e10cSrcweir                              sal_uInt32 nFlags, sal_uInt32* pInfo,
258*cdf0e10cSrcweir                              sal_Size* pSrcCvtChars );
259*cdf0e10cSrcweir void* ImplUTF7CreateUTF7TextToUnicodeContext( void );
260*cdf0e10cSrcweir void ImplUTF7DestroyTextToUnicodeContext( void* pContext );
261*cdf0e10cSrcweir void ImplUTF7ResetTextToUnicodeContext( void* pContext );
262*cdf0e10cSrcweir sal_Size ImplUTF7ToUnicode( const ImplTextConverterData* pData, void* pContext,
263*cdf0e10cSrcweir                             const sal_Char* pSrcBuf, sal_Size nSrcBytes,
264*cdf0e10cSrcweir                             sal_Unicode* pDestBuf, sal_Size nDestChars,
265*cdf0e10cSrcweir                             sal_uInt32 nFlags, sal_uInt32* pInfo,
266*cdf0e10cSrcweir                             sal_Size* pSrcCvtBytes );
267*cdf0e10cSrcweir void* ImplUTF7CreateUnicodeToTextContext( void );
268*cdf0e10cSrcweir void ImplUTF7DestroyUnicodeToTextContext( void* pContext );
269*cdf0e10cSrcweir void ImplUTF7ResetUnicodeToTextContext( void* pContext );
270*cdf0e10cSrcweir sal_Size ImplUnicodeToUTF7( const ImplTextConverterData* pData, void* pContext,
271*cdf0e10cSrcweir                             const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
272*cdf0e10cSrcweir                             sal_Char* pDestBuf, sal_Size nDestBytes,
273*cdf0e10cSrcweir                             sal_uInt32 nFlags, sal_uInt32* pInfo,
274*cdf0e10cSrcweir                             sal_Size* pSrcCvtChars );
275*cdf0e10cSrcweir 
276*cdf0e10cSrcweir void * ImplCreateUtf8ToUnicodeContext(void) SAL_THROW_EXTERN_C();
277*cdf0e10cSrcweir void ImplResetUtf8ToUnicodeContext(void * pContext) SAL_THROW_EXTERN_C();
278*cdf0e10cSrcweir sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData,
279*cdf0e10cSrcweir                                   void * pContext, sal_Char const * pSrcBuf,
280*cdf0e10cSrcweir                                   sal_Size nSrcBytes, sal_Unicode * pDestBuf,
281*cdf0e10cSrcweir                                   sal_Size nDestChars, sal_uInt32 nFlags,
282*cdf0e10cSrcweir                                   sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes)
283*cdf0e10cSrcweir     SAL_THROW_EXTERN_C();
284*cdf0e10cSrcweir void * ImplCreateUnicodeToUtf8Context(void) SAL_THROW_EXTERN_C();
285*cdf0e10cSrcweir void ImplResetUnicodeToUtf8Context(void * pContext) SAL_THROW_EXTERN_C();
286*cdf0e10cSrcweir sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData,
287*cdf0e10cSrcweir                                   void * pContext, sal_Unicode const * pSrcBuf,
288*cdf0e10cSrcweir                                   sal_Size nSrcChars, sal_Char * pDestBuf,
289*cdf0e10cSrcweir                                   sal_Size nDestBytes, sal_uInt32 nFlags,
290*cdf0e10cSrcweir                                   sal_uInt32 * pInfo, sal_Size* pSrcCvtChars)
291*cdf0e10cSrcweir     SAL_THROW_EXTERN_C();
292*cdf0e10cSrcweir 
293*cdf0e10cSrcweir #if defined __cplusplus
294*cdf0e10cSrcweir }
295*cdf0e10cSrcweir #endif /* __cplusplus */
296*cdf0e10cSrcweir 
297*cdf0e10cSrcweir #endif /* INCLUDED_RTL_TEXTENC_TENCHELP_H */
298