1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 30*cdf0e10cSrcweir #include <breakiterator_th.hxx> 31*cdf0e10cSrcweir #include <wtt.h> 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir #include <string.h> // for memset 34*cdf0e10cSrcweir 35*cdf0e10cSrcweir using namespace ::com::sun::star::uno; 36*cdf0e10cSrcweir using namespace ::com::sun::star::lang; 37*cdf0e10cSrcweir using namespace ::rtl; 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir // ---------------------------------------------------- 42*cdf0e10cSrcweir // class Breakiterator_th 43*cdf0e10cSrcweir // ----------------------------------------------------; 44*cdf0e10cSrcweir BreakIterator_th::BreakIterator_th() 45*cdf0e10cSrcweir { 46*cdf0e10cSrcweir cBreakIterator = "com.sun.star.i18n.BreakIterator_th"; 47*cdf0e10cSrcweir wordRule=lineRule=NULL; 48*cdf0e10cSrcweir } 49*cdf0e10cSrcweir 50*cdf0e10cSrcweir BreakIterator_th::~BreakIterator_th() 51*cdf0e10cSrcweir { 52*cdf0e10cSrcweir } 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir #define SARA_AM 0x0E33 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir /* 57*cdf0e10cSrcweir * cell composition states 58*cdf0e10cSrcweir */ 59*cdf0e10cSrcweir 60*cdf0e10cSrcweir #define ST_COM 1 // Compose the following character with leading char and display in the same cell 61*cdf0e10cSrcweir #define ST_NXT 2 // display the following character in the next cell 62*cdf0e10cSrcweir #define ST_NDP 3 // non-display 63*cdf0e10cSrcweir 64*cdf0e10cSrcweir static const sal_Int16 thaiCompRel[MAX_CT][MAX_CT] = { 65*cdf0e10cSrcweir // C N C L F F F B B B T A A A A A A 66*cdf0e10cSrcweir // T O O V V V V V V D O D D D V V V 67*cdf0e10cSrcweir // R N N 1 2 3 1 2 N 1 2 3 1 2 3 68*cdf0e10cSrcweir // L S E 69*cdf0e10cSrcweir // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 70*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // CTRL 0 71*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // NON 1 72*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM }, // CONS 2 73*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // LV 3 74*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV1 4 75*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV2 5 76*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV3 6 77*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BV1 7 78*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BV2 8 79*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BD 9 80*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // TONE 10 81*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD1 11 82*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD2 12 83*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD3 13 84*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AV1 14 85*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AV2 15 86*cdf0e10cSrcweir { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT } // AV3 16 87*cdf0e10cSrcweir 88*cdf0e10cSrcweir }; 89*cdf0e10cSrcweir 90*cdf0e10cSrcweir const sal_uInt32 is_ST_COM = (1<<CT_CTRL)|(1<<CT_NON)|(1<<CT_CONS)|(1<<CT_TONE); 91*cdf0e10cSrcweir 92*cdf0e10cSrcweir static sal_uInt16 SAL_CALL getCombState(const sal_Unicode *text, sal_Int32 pos) 93*cdf0e10cSrcweir { 94*cdf0e10cSrcweir sal_uInt16 ch1 = getCharType(text[pos]); 95*cdf0e10cSrcweir sal_uInt16 ch2 = getCharType(text[pos+1]); 96*cdf0e10cSrcweir 97*cdf0e10cSrcweir if (text[pos+1] == SARA_AM) { 98*cdf0e10cSrcweir if ((1 << ch1) & is_ST_COM) 99*cdf0e10cSrcweir return ST_COM; 100*cdf0e10cSrcweir else 101*cdf0e10cSrcweir ch2 = CT_AD1; 102*cdf0e10cSrcweir } 103*cdf0e10cSrcweir 104*cdf0e10cSrcweir return thaiCompRel[ch1][ch2]; 105*cdf0e10cSrcweir } 106*cdf0e10cSrcweir 107*cdf0e10cSrcweir 108*cdf0e10cSrcweir static sal_Int32 SAL_CALL getACell(const sal_Unicode *text, sal_Int32 pos, sal_Int32 len) 109*cdf0e10cSrcweir { 110*cdf0e10cSrcweir sal_uInt32 curr = 1; 111*cdf0e10cSrcweir for (; pos + 1 < len && getCombState(text, pos) == ST_COM; curr++, pos++) {} 112*cdf0e10cSrcweir return curr; 113*cdf0e10cSrcweir } 114*cdf0e10cSrcweir 115*cdf0e10cSrcweir #define is_Thai(c) (0x0e00 <= c && c <= 0x0e7f) // Unicode definition for Thai 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir void SAL_CALL BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 nStartPos) 118*cdf0e10cSrcweir throw(RuntimeException) 119*cdf0e10cSrcweir { 120*cdf0e10cSrcweir if (Text != cachedText) { 121*cdf0e10cSrcweir cachedText = Text; 122*cdf0e10cSrcweir if (cellIndexSize < cachedText.getLength()) { 123*cdf0e10cSrcweir cellIndexSize = cachedText.getLength(); 124*cdf0e10cSrcweir free(nextCellIndex); 125*cdf0e10cSrcweir free(previousCellIndex); 126*cdf0e10cSrcweir nextCellIndex = (sal_Int32*) calloc(cellIndexSize, sizeof(sal_Int32)); 127*cdf0e10cSrcweir previousCellIndex = (sal_Int32*) calloc(cellIndexSize, sizeof(sal_Int32)); 128*cdf0e10cSrcweir } 129*cdf0e10cSrcweir // reset nextCell for new Text 130*cdf0e10cSrcweir memset(nextCellIndex, 0, cellIndexSize * sizeof(sal_Int32)); 131*cdf0e10cSrcweir } 132*cdf0e10cSrcweir else if (nextCellIndex[nStartPos] > 0 || ! is_Thai(Text[nStartPos])) 133*cdf0e10cSrcweir return; 134*cdf0e10cSrcweir 135*cdf0e10cSrcweir const sal_Unicode* str = cachedText.getStr(); 136*cdf0e10cSrcweir sal_Int32 len = cachedText.getLength(), startPos, endPos; 137*cdf0e10cSrcweir 138*cdf0e10cSrcweir startPos = nStartPos; 139*cdf0e10cSrcweir while (startPos > 0 && is_Thai(str[startPos-1])) startPos--; 140*cdf0e10cSrcweir endPos = nStartPos+1; 141*cdf0e10cSrcweir while (endPos < len && is_Thai(str[endPos])) endPos++; 142*cdf0e10cSrcweir 143*cdf0e10cSrcweir sal_Int32 start, end, pos; 144*cdf0e10cSrcweir pos = start = end = startPos; 145*cdf0e10cSrcweir 146*cdf0e10cSrcweir while (pos < endPos) { 147*cdf0e10cSrcweir end += getACell(str, start, endPos); 148*cdf0e10cSrcweir while (pos < end) { 149*cdf0e10cSrcweir nextCellIndex[pos] = end; 150*cdf0e10cSrcweir previousCellIndex[pos] = start; 151*cdf0e10cSrcweir pos++; 152*cdf0e10cSrcweir } 153*cdf0e10cSrcweir start = end; 154*cdf0e10cSrcweir } 155*cdf0e10cSrcweir } 156*cdf0e10cSrcweir 157*cdf0e10cSrcweir } } } } 158