1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30*cdf0e10cSrcweir #include <breakiterator_th.hxx>
31*cdf0e10cSrcweir #include <wtt.h>
32*cdf0e10cSrcweir 
33*cdf0e10cSrcweir #include <string.h>	// for memset
34*cdf0e10cSrcweir 
35*cdf0e10cSrcweir using namespace ::com::sun::star::uno;
36*cdf0e10cSrcweir using namespace ::com::sun::star::lang;
37*cdf0e10cSrcweir using namespace ::rtl;
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir //	----------------------------------------------------
42*cdf0e10cSrcweir //	class Breakiterator_th
43*cdf0e10cSrcweir //	----------------------------------------------------;
44*cdf0e10cSrcweir BreakIterator_th::BreakIterator_th()
45*cdf0e10cSrcweir {
46*cdf0e10cSrcweir 	cBreakIterator = "com.sun.star.i18n.BreakIterator_th";
47*cdf0e10cSrcweir     wordRule=lineRule=NULL;
48*cdf0e10cSrcweir }
49*cdf0e10cSrcweir 
50*cdf0e10cSrcweir BreakIterator_th::~BreakIterator_th()
51*cdf0e10cSrcweir {
52*cdf0e10cSrcweir }
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir #define SARA_AM 0x0E33
55*cdf0e10cSrcweir 
56*cdf0e10cSrcweir /*
57*cdf0e10cSrcweir  * cell composition states
58*cdf0e10cSrcweir  */
59*cdf0e10cSrcweir 
60*cdf0e10cSrcweir #define ST_COM	1	// Compose the following character with leading char and display in the same cell
61*cdf0e10cSrcweir #define ST_NXT	2	// display the following character in the next cell
62*cdf0e10cSrcweir #define ST_NDP	3	// non-display
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir static const sal_Int16 thaiCompRel[MAX_CT][MAX_CT] = {
65*cdf0e10cSrcweir 	//  C  N  C  L  F  F  F  B  B  B  T  A  A  A  A  A  A
66*cdf0e10cSrcweir 	//  T  O  O  V  V  V  V  V  V  D  O  D  D  D  V  V  V
67*cdf0e10cSrcweir 	//  R  N  N     1  2  3  1  2     N  1  2  3  1  2  3
68*cdf0e10cSrcweir 	//  L     S                       E
69*cdf0e10cSrcweir 	//  0  1  2  3  4  5  6  7  8  9  10 11 12 13 14 15 16
70*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // CTRL 0
71*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // NON  1
72*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM   }, // CONS 2
73*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // LV   3
74*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // FV1  4
75*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // FV2  5
76*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // FV3  6
77*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // BV1  7
78*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // BV2  8
79*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // BD   9
80*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // TONE 10
81*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // AD1  11
82*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // AD2  12
83*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // AD3  13
84*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // AV1  14
85*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT   }, // AV2  15
86*cdf0e10cSrcweir 	{   ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT   } // AV3  16
87*cdf0e10cSrcweir 
88*cdf0e10cSrcweir };
89*cdf0e10cSrcweir 
90*cdf0e10cSrcweir const sal_uInt32 is_ST_COM = (1<<CT_CTRL)|(1<<CT_NON)|(1<<CT_CONS)|(1<<CT_TONE);
91*cdf0e10cSrcweir 
92*cdf0e10cSrcweir static sal_uInt16 SAL_CALL getCombState(const sal_Unicode *text, sal_Int32 pos)
93*cdf0e10cSrcweir {
94*cdf0e10cSrcweir 	sal_uInt16 ch1 = getCharType(text[pos]);
95*cdf0e10cSrcweir 	sal_uInt16 ch2 = getCharType(text[pos+1]);
96*cdf0e10cSrcweir 
97*cdf0e10cSrcweir 	if (text[pos+1] == SARA_AM) {
98*cdf0e10cSrcweir 	    if ((1 << ch1) & is_ST_COM)
99*cdf0e10cSrcweir 		return  ST_COM;
100*cdf0e10cSrcweir 	    else
101*cdf0e10cSrcweir 		ch2 = CT_AD1;
102*cdf0e10cSrcweir 	}
103*cdf0e10cSrcweir 
104*cdf0e10cSrcweir 	return thaiCompRel[ch1][ch2];
105*cdf0e10cSrcweir }
106*cdf0e10cSrcweir 
107*cdf0e10cSrcweir 
108*cdf0e10cSrcweir static sal_Int32 SAL_CALL getACell(const sal_Unicode *text, sal_Int32 pos, sal_Int32 len)
109*cdf0e10cSrcweir {
110*cdf0e10cSrcweir 	sal_uInt32 curr = 1;
111*cdf0e10cSrcweir 	for (; pos + 1 < len && getCombState(text, pos) == ST_COM; curr++, pos++) {}
112*cdf0e10cSrcweir 	return curr;
113*cdf0e10cSrcweir }
114*cdf0e10cSrcweir 
115*cdf0e10cSrcweir #define is_Thai(c)	(0x0e00 <= c && c <= 0x0e7f) // Unicode definition for Thai
116*cdf0e10cSrcweir 
117*cdf0e10cSrcweir void SAL_CALL BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 nStartPos)
118*cdf0e10cSrcweir 	throw(RuntimeException)
119*cdf0e10cSrcweir {
120*cdf0e10cSrcweir 	if (Text != cachedText) {
121*cdf0e10cSrcweir 	    cachedText = Text;
122*cdf0e10cSrcweir 	    if (cellIndexSize < cachedText.getLength()) {
123*cdf0e10cSrcweir 		cellIndexSize = cachedText.getLength();
124*cdf0e10cSrcweir 		free(nextCellIndex);
125*cdf0e10cSrcweir 		free(previousCellIndex);
126*cdf0e10cSrcweir 		nextCellIndex = (sal_Int32*) calloc(cellIndexSize, sizeof(sal_Int32));
127*cdf0e10cSrcweir 		previousCellIndex = (sal_Int32*) calloc(cellIndexSize, sizeof(sal_Int32));
128*cdf0e10cSrcweir 	    }
129*cdf0e10cSrcweir 	    // reset nextCell for new Text
130*cdf0e10cSrcweir 	    memset(nextCellIndex, 0, cellIndexSize * sizeof(sal_Int32));
131*cdf0e10cSrcweir 	}
132*cdf0e10cSrcweir 	else if (nextCellIndex[nStartPos] > 0 || ! is_Thai(Text[nStartPos]))
133*cdf0e10cSrcweir 	    return;
134*cdf0e10cSrcweir 
135*cdf0e10cSrcweir 	const sal_Unicode* str = cachedText.getStr();
136*cdf0e10cSrcweir 	sal_Int32 len = cachedText.getLength(), startPos, endPos;
137*cdf0e10cSrcweir 
138*cdf0e10cSrcweir 	startPos = nStartPos;
139*cdf0e10cSrcweir 	while (startPos > 0 && is_Thai(str[startPos-1])) startPos--;
140*cdf0e10cSrcweir 	endPos = nStartPos+1;
141*cdf0e10cSrcweir 	while (endPos < len && is_Thai(str[endPos])) endPos++;
142*cdf0e10cSrcweir 
143*cdf0e10cSrcweir 	sal_Int32 start, end, pos;
144*cdf0e10cSrcweir 	pos = start = end = startPos;
145*cdf0e10cSrcweir 
146*cdf0e10cSrcweir 	while (pos < endPos) {
147*cdf0e10cSrcweir 	    end += getACell(str, start, endPos);
148*cdf0e10cSrcweir 	    while (pos < end) {
149*cdf0e10cSrcweir 		nextCellIndex[pos] = end;
150*cdf0e10cSrcweir 		previousCellIndex[pos] = start;
151*cdf0e10cSrcweir 		pos++;
152*cdf0e10cSrcweir 	    }
153*cdf0e10cSrcweir 	    start = end;
154*cdf0e10cSrcweir 	}
155*cdf0e10cSrcweir }
156*cdf0e10cSrcweir 
157*cdf0e10cSrcweir } } } }
158