1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file
5*449ab281SAndrew Rist * distributed with this work for additional information
6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at
10*449ab281SAndrew Rist *
11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist *
13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist * software distributed under the License is distributed on an
15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the
17*449ab281SAndrew Rist * specific language governing permissions and limitations
18*449ab281SAndrew Rist * under the License.
19*449ab281SAndrew Rist *
20*449ab281SAndrew Rist *************************************************************/
21*449ab281SAndrew Rist
22*449ab281SAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir #include <breakiterator_th.hxx>
27cdf0e10cSrcweir #include <wtt.h>
28cdf0e10cSrcweir
29cdf0e10cSrcweir #include <string.h> // for memset
30cdf0e10cSrcweir
31cdf0e10cSrcweir using namespace ::com::sun::star::uno;
32cdf0e10cSrcweir using namespace ::com::sun::star::lang;
33cdf0e10cSrcweir using namespace ::rtl;
34cdf0e10cSrcweir
35cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
36cdf0e10cSrcweir
37cdf0e10cSrcweir // ----------------------------------------------------
38cdf0e10cSrcweir // class Breakiterator_th
39cdf0e10cSrcweir // ----------------------------------------------------;
BreakIterator_th()40cdf0e10cSrcweir BreakIterator_th::BreakIterator_th()
41cdf0e10cSrcweir {
42cdf0e10cSrcweir cBreakIterator = "com.sun.star.i18n.BreakIterator_th";
43cdf0e10cSrcweir wordRule=lineRule=NULL;
44cdf0e10cSrcweir }
45cdf0e10cSrcweir
~BreakIterator_th()46cdf0e10cSrcweir BreakIterator_th::~BreakIterator_th()
47cdf0e10cSrcweir {
48cdf0e10cSrcweir }
49cdf0e10cSrcweir
50cdf0e10cSrcweir #define SARA_AM 0x0E33
51cdf0e10cSrcweir
52cdf0e10cSrcweir /*
53cdf0e10cSrcweir * cell composition states
54cdf0e10cSrcweir */
55cdf0e10cSrcweir
56cdf0e10cSrcweir #define ST_COM 1 // Compose the following character with leading char and display in the same cell
57cdf0e10cSrcweir #define ST_NXT 2 // display the following character in the next cell
58cdf0e10cSrcweir #define ST_NDP 3 // non-display
59cdf0e10cSrcweir
60cdf0e10cSrcweir static const sal_Int16 thaiCompRel[MAX_CT][MAX_CT] = {
61cdf0e10cSrcweir // C N C L F F F B B B T A A A A A A
62cdf0e10cSrcweir // T O O V V V V V V D O D D D V V V
63cdf0e10cSrcweir // R N N 1 2 3 1 2 N 1 2 3 1 2 3
64cdf0e10cSrcweir // L S E
65cdf0e10cSrcweir // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
66cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // CTRL 0
67cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // NON 1
68cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM }, // CONS 2
69cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // LV 3
70cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV1 4
71cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV2 5
72cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV3 6
73cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BV1 7
74cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BV2 8
75cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BD 9
76cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // TONE 10
77cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD1 11
78cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD2 12
79cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD3 13
80cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AV1 14
81cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AV2 15
82cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT } // AV3 16
83cdf0e10cSrcweir
84cdf0e10cSrcweir };
85cdf0e10cSrcweir
86cdf0e10cSrcweir const sal_uInt32 is_ST_COM = (1<<CT_CTRL)|(1<<CT_NON)|(1<<CT_CONS)|(1<<CT_TONE);
87cdf0e10cSrcweir
getCombState(const sal_Unicode * text,sal_Int32 pos)88cdf0e10cSrcweir static sal_uInt16 SAL_CALL getCombState(const sal_Unicode *text, sal_Int32 pos)
89cdf0e10cSrcweir {
90cdf0e10cSrcweir sal_uInt16 ch1 = getCharType(text[pos]);
91cdf0e10cSrcweir sal_uInt16 ch2 = getCharType(text[pos+1]);
92cdf0e10cSrcweir
93cdf0e10cSrcweir if (text[pos+1] == SARA_AM) {
94cdf0e10cSrcweir if ((1 << ch1) & is_ST_COM)
95cdf0e10cSrcweir return ST_COM;
96cdf0e10cSrcweir else
97cdf0e10cSrcweir ch2 = CT_AD1;
98cdf0e10cSrcweir }
99cdf0e10cSrcweir
100cdf0e10cSrcweir return thaiCompRel[ch1][ch2];
101cdf0e10cSrcweir }
102cdf0e10cSrcweir
103cdf0e10cSrcweir
getACell(const sal_Unicode * text,sal_Int32 pos,sal_Int32 len)104cdf0e10cSrcweir static sal_Int32 SAL_CALL getACell(const sal_Unicode *text, sal_Int32 pos, sal_Int32 len)
105cdf0e10cSrcweir {
106cdf0e10cSrcweir sal_uInt32 curr = 1;
107cdf0e10cSrcweir for (; pos + 1 < len && getCombState(text, pos) == ST_COM; curr++, pos++) {}
108cdf0e10cSrcweir return curr;
109cdf0e10cSrcweir }
110cdf0e10cSrcweir
111cdf0e10cSrcweir #define is_Thai(c) (0x0e00 <= c && c <= 0x0e7f) // Unicode definition for Thai
112cdf0e10cSrcweir
makeIndex(const OUString & Text,sal_Int32 nStartPos)113cdf0e10cSrcweir void SAL_CALL BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 nStartPos)
114cdf0e10cSrcweir throw(RuntimeException)
115cdf0e10cSrcweir {
116cdf0e10cSrcweir if (Text != cachedText) {
117cdf0e10cSrcweir cachedText = Text;
118cdf0e10cSrcweir if (cellIndexSize < cachedText.getLength()) {
119cdf0e10cSrcweir cellIndexSize = cachedText.getLength();
120cdf0e10cSrcweir free(nextCellIndex);
121cdf0e10cSrcweir free(previousCellIndex);
122cdf0e10cSrcweir nextCellIndex = (sal_Int32*) calloc(cellIndexSize, sizeof(sal_Int32));
123cdf0e10cSrcweir previousCellIndex = (sal_Int32*) calloc(cellIndexSize, sizeof(sal_Int32));
124cdf0e10cSrcweir }
125cdf0e10cSrcweir // reset nextCell for new Text
126cdf0e10cSrcweir memset(nextCellIndex, 0, cellIndexSize * sizeof(sal_Int32));
127cdf0e10cSrcweir }
128cdf0e10cSrcweir else if (nextCellIndex[nStartPos] > 0 || ! is_Thai(Text[nStartPos]))
129cdf0e10cSrcweir return;
130cdf0e10cSrcweir
131cdf0e10cSrcweir const sal_Unicode* str = cachedText.getStr();
132cdf0e10cSrcweir sal_Int32 len = cachedText.getLength(), startPos, endPos;
133cdf0e10cSrcweir
134cdf0e10cSrcweir startPos = nStartPos;
135cdf0e10cSrcweir while (startPos > 0 && is_Thai(str[startPos-1])) startPos--;
136cdf0e10cSrcweir endPos = nStartPos+1;
137cdf0e10cSrcweir while (endPos < len && is_Thai(str[endPos])) endPos++;
138cdf0e10cSrcweir
139cdf0e10cSrcweir sal_Int32 start, end, pos;
140cdf0e10cSrcweir pos = start = end = startPos;
141cdf0e10cSrcweir
142cdf0e10cSrcweir while (pos < endPos) {
143cdf0e10cSrcweir end += getACell(str, start, endPos);
144cdf0e10cSrcweir while (pos < end) {
145cdf0e10cSrcweir nextCellIndex[pos] = end;
146cdf0e10cSrcweir previousCellIndex[pos] = start;
147cdf0e10cSrcweir pos++;
148cdf0e10cSrcweir }
149cdf0e10cSrcweir start = end;
150cdf0e10cSrcweir }
151cdf0e10cSrcweir }
152cdf0e10cSrcweir
153cdf0e10cSrcweir } } } }
154