source/characterclassification/cclass_unicode_parser.cxx

/**************************************************************
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *************************************************************/


// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_i18npool.hxx"

#include <cclass_unicode.hxx>
#include <unicode/uchar.h>
#include <rtl/math.hxx>
#include <rtl/ustring.hxx>
#include <com/sun/star/i18n/KParseTokens.hpp>
#include <com/sun/star/i18n/KParseType.hpp>
#include <com/sun/star/i18n/UnicodeType.hpp>
#include <com/sun/star/i18n/XLocaleData.hpp>
#include <com/sun/star/i18n/NativeNumberMode.hpp>

#include <string.h>		// memcpy()

using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::lang;
using namespace ::rtl;

namespace com { namespace sun { namespace star { namespace i18n {

const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL		= 0x00000000;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR			= 0x00000001;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL	= 0x00000002;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD	= 0x00000004;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE	= 0x00000008;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING	= 0x00000010;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL			= 0x00000040;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD			= 0x00000080;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP		= 0x00000100;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE		= 0x00000200;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP	= 0x00000400;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP	= 0x00000800;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN	= 0x00001000;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE	= 0x00002000;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT	= 0x00004000;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP		= 0x20000000;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP	= 0x40000000;
const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED		= 0x80000000;

#define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)

// Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*

const sal_uInt8 cclass_Unicode::nDefCnt = 128;
const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
{
// (...) == Calc formula compiler specific, commented out and modified

	/* \0 */	TOKEN_EXCLUDED,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
	/*  9 \t */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
				TOKEN_ILLEGAL,
	/* 11 \v */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
				TOKEN_ILLEGAL,
	/*  32   */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  33 ! */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  34 " */	TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
	/*  35 # */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD_SEP)
	/*  36 $ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
	/*  37 % */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_VALUE)
	/*  38 & */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  39 ' */	TOKEN_NAME_SEP,
	/*  40 ( */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  41 ) */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  42 * */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  43 + */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
	/*  44 , */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_VALUE | TOKEN_VALUE)
	/*  45 - */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
	/*  46 . */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
	/*  47 / */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	//for ( i = 48; i < 58; i++ )
	/*  48 0 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  49 1 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  50 2 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  51 3 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  52 4 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  53 5 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  54 6 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  55 7 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  56 8 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  57 9 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
	/*  58 : */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD)
	/*  59 ; */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  60 < */	TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  61 = */	TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  62 > */	TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  63 ? */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
	/*  64 @ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	//for ( i = 65; i < 91; i++ )
	/*  65 A */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  66 B */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  67 C */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  68 D */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  69 E */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  70 F */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  71 G */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  72 H */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  73 I */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  74 J */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  75 K */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  76 L */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  77 M */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  78 N */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  79 O */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  80 P */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  81 Q */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  82 R */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  83 S */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  84 T */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  85 U */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  86 V */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  87 W */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  88 X */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  89 Y */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  90 Z */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  91 [ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	/*  92 \ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	/*  93 ] */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	/*  94 ^ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
	/*  95 _ */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  96 ` */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	//for ( i = 97; i < 123; i++ )
	/*  97 a */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  98 b */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/*  99 c */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 100 d */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 101 e */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 102 f */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 103 g */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 104 h */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 105 i */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 106 j */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 107 k */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 108 l */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 109 m */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 110 n */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 111 o */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 112 p */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 113 q */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 114 r */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 115 s */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 116 t */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 117 u */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 118 v */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 119 w */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 120 x */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 121 y */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 122 z */	TOKEN_CHAR_WORD | TOKEN_WORD,
	/* 123 { */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	/* 124 | */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	/* 125 } */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	/* 126 ~ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
	/* 127   */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP	// (TOKEN_ILLEGAL // UNUSED)
};


const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
{
	/* \0 */	KParseTokens::ASC_OTHER,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
	/*  9 \t */	KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
	/* 11 \v */	KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
				KParseTokens::ASC_CONTROL,
	/*  32   */	KParseTokens::ASC_OTHER,
	/*  33 ! */	KParseTokens::ASC_OTHER,
	/*  34 " */	KParseTokens::ASC_OTHER,
	/*  35 # */	KParseTokens::ASC_OTHER,
	/*  36 $ */	KParseTokens::ASC_DOLLAR,
	/*  37 % */	KParseTokens::ASC_OTHER,
	/*  38 & */	KParseTokens::ASC_OTHER,
	/*  39 ' */	KParseTokens::ASC_OTHER,
	/*  40 ( */	KParseTokens::ASC_OTHER,
	/*  41 ) */	KParseTokens::ASC_OTHER,
	/*  42 * */	KParseTokens::ASC_OTHER,
	/*  43 + */	KParseTokens::ASC_OTHER,
	/*  44 , */	KParseTokens::ASC_OTHER,
	/*  45 - */	KParseTokens::ASC_OTHER,
	/*  46 . */	KParseTokens::ASC_DOT,
	/*  47 / */	KParseTokens::ASC_OTHER,
	//for ( i = 48; i < 58; i++ )
	/*  48 0 */	KParseTokens::ASC_DIGIT,
	/*  49 1 */	KParseTokens::ASC_DIGIT,
	/*  50 2 */	KParseTokens::ASC_DIGIT,
	/*  51 3 */	KParseTokens::ASC_DIGIT,
	/*  52 4 */	KParseTokens::ASC_DIGIT,
	/*  53 5 */	KParseTokens::ASC_DIGIT,
	/*  54 6 */	KParseTokens::ASC_DIGIT,
	/*  55 7 */	KParseTokens::ASC_DIGIT,
	/*  56 8 */	KParseTokens::ASC_DIGIT,
	/*  57 9 */	KParseTokens::ASC_DIGIT,
	/*  58 : */	KParseTokens::ASC_COLON,
	/*  59 ; */	KParseTokens::ASC_OTHER,
	/*  60 < */	KParseTokens::ASC_OTHER,
	/*  61 = */	KParseTokens::ASC_OTHER,
	/*  62 > */	KParseTokens::ASC_OTHER,
	/*  63 ? */	KParseTokens::ASC_OTHER,
	/*  64 @ */	KParseTokens::ASC_OTHER,
	//for ( i = 65; i < 91; i++ )
	/*  65 A */	KParseTokens::ASC_UPALPHA,
	/*  66 B */	KParseTokens::ASC_UPALPHA,
	/*  67 C */	KParseTokens::ASC_UPALPHA,
	/*  68 D */	KParseTokens::ASC_UPALPHA,
	/*  69 E */	KParseTokens::ASC_UPALPHA,
	/*  70 F */	KParseTokens::ASC_UPALPHA,
	/*  71 G */	KParseTokens::ASC_UPALPHA,
	/*  72 H */	KParseTokens::ASC_UPALPHA,
	/*  73 I */	KParseTokens::ASC_UPALPHA,
	/*  74 J */	KParseTokens::ASC_UPALPHA,
	/*  75 K */	KParseTokens::ASC_UPALPHA,
	/*  76 L */	KParseTokens::ASC_UPALPHA,
	/*  77 M */	KParseTokens::ASC_UPALPHA,
	/*  78 N */	KParseTokens::ASC_UPALPHA,
	/*  79 O */	KParseTokens::ASC_UPALPHA,
	/*  80 P */	KParseTokens::ASC_UPALPHA,
	/*  81 Q */	KParseTokens::ASC_UPALPHA,
	/*  82 R */	KParseTokens::ASC_UPALPHA,
	/*  83 S */	KParseTokens::ASC_UPALPHA,
	/*  84 T */	KParseTokens::ASC_UPALPHA,
	/*  85 U */	KParseTokens::ASC_UPALPHA,
	/*  86 V */	KParseTokens::ASC_UPALPHA,
	/*  87 W */	KParseTokens::ASC_UPALPHA,
	/*  88 X */	KParseTokens::ASC_UPALPHA,
	/*  89 Y */	KParseTokens::ASC_UPALPHA,
	/*  90 Z */	KParseTokens::ASC_UPALPHA,
	/*  91 [ */	KParseTokens::ASC_OTHER,
	/*  92 \ */	KParseTokens::ASC_OTHER,
	/*  93 ] */	KParseTokens::ASC_OTHER,
	/*  94 ^ */	KParseTokens::ASC_OTHER,
	/*  95 _ */	KParseTokens::ASC_UNDERSCORE,
	/*  96 ` */	KParseTokens::ASC_OTHER,
	//for ( i = 97; i < 123; i++ )
	/*  97 a */	KParseTokens::ASC_LOALPHA,
	/*  98 b */	KParseTokens::ASC_LOALPHA,
	/*  99 c */	KParseTokens::ASC_LOALPHA,
	/* 100 d */	KParseTokens::ASC_LOALPHA,
	/* 101 e */	KParseTokens::ASC_LOALPHA,
	/* 102 f */	KParseTokens::ASC_LOALPHA,
	/* 103 g */	KParseTokens::ASC_LOALPHA,
	/* 104 h */	KParseTokens::ASC_LOALPHA,
	/* 105 i */	KParseTokens::ASC_LOALPHA,
	/* 106 j */	KParseTokens::ASC_LOALPHA,
	/* 107 k */	KParseTokens::ASC_LOALPHA,
	/* 108 l */	KParseTokens::ASC_LOALPHA,
	/* 109 m */	KParseTokens::ASC_LOALPHA,
	/* 110 n */	KParseTokens::ASC_LOALPHA,
	/* 111 o */	KParseTokens::ASC_LOALPHA,
	/* 112 p */	KParseTokens::ASC_LOALPHA,
	/* 113 q */	KParseTokens::ASC_LOALPHA,
	/* 114 r */	KParseTokens::ASC_LOALPHA,
	/* 115 s */	KParseTokens::ASC_LOALPHA,
	/* 116 t */	KParseTokens::ASC_LOALPHA,
	/* 117 u */	KParseTokens::ASC_LOALPHA,
	/* 118 v */	KParseTokens::ASC_LOALPHA,
	/* 119 w */	KParseTokens::ASC_LOALPHA,
	/* 120 x */	KParseTokens::ASC_LOALPHA,
	/* 121 y */	KParseTokens::ASC_LOALPHA,
	/* 122 z */	KParseTokens::ASC_LOALPHA,
	/* 123 { */	KParseTokens::ASC_OTHER,
	/* 124 | */	KParseTokens::ASC_OTHER,
	/* 125 } */	KParseTokens::ASC_OTHER,
	/* 126 ~ */	KParseTokens::ASC_OTHER,
	/* 127   */	KParseTokens::ASC_OTHER
};


// static
const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
{
	if ( !pStr )
		return NULL;
	while ( *pStr )
	{
		if ( *pStr == c )
			return pStr;
		pStr++;
	}
	return NULL;
}


sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
{
	sal_Unicode c = aStr[nPos];
	if ( c < nDefCnt )
		return pParseTokensType[ sal_uInt8(c) ];
	else
	{

		//! all KParseTokens::UNI_... must be matched
        switch ( u_charType( (sal_uInt32) c ) )
		{
			case U_UPPERCASE_LETTER :
				return KParseTokens::UNI_UPALPHA;
			case U_LOWERCASE_LETTER :
				return KParseTokens::UNI_LOALPHA;
			case U_TITLECASE_LETTER :
				return KParseTokens::UNI_TITLE_ALPHA;
			case U_MODIFIER_LETTER :
				return KParseTokens::UNI_MODIFIER_LETTER;
			case U_OTHER_LETTER :
				// Non_Spacing_Mark could not be as leading character
				if (nPos == 0) break;
				// fall through, treat it as Other_Letter.
			case U_NON_SPACING_MARK :
				return KParseTokens::UNI_OTHER_LETTER;
			case U_DECIMAL_DIGIT_NUMBER :
				return KParseTokens::UNI_DIGIT;
			case U_LETTER_NUMBER :
				return KParseTokens::UNI_LETTER_NUMBER;
			case U_OTHER_NUMBER :
				return KParseTokens::UNI_OTHER_NUMBER;
		}

		return KParseTokens::UNI_OTHER;
	}
}

sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
{
	sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
		|| aParserLocale.Country != rLocale.Country
		|| aParserLocale.Variant != rLocale.Variant);
	if ( bChanged )
	{
		aParserLocale.Language = rLocale.Language;
		aParserLocale.Country = rLocale.Country;
		aParserLocale.Variant = rLocale.Variant;
	}
	if ( !xLocaleData.is() && xMSF.is() )
	{
		Reference <
			XInterface > xI =
			xMSF->createInstance( OUString(
			RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
		if ( xI.is() )
		{
			Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
			x >>= xLocaleData;
		}
	}
	return bChanged;
}


void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
            const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
            const OUString& userDefinedCharactersCont )
{
	bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
		rLocale.Country == aParserLocale.Country &&
		rLocale.Variant == aParserLocale.Variant);
	if ( !pTable || !bIntlEqual ||
			startCharTokenType != nStartTypes ||
			contCharTokenType != nContTypes ||
			userDefinedCharactersStart != aStartChars ||
			userDefinedCharactersCont != aContChars )
		initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
			contCharTokenType, userDefinedCharactersCont );
}


void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
            const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
            const OUString& userDefinedCharactersCont )
{
	// (Re)Init
	setupInternational( rLocale );
	// Memory of pTable is reused.
	if ( !pTable )
		pTable = new UPT_FLAG_TYPE[nDefCnt];
	memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
	// Start and cont tables only need reallocation if different length.
    if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
	{
		delete [] pStart;
		pStart = NULL;
	}
    if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
	{
		delete [] pCont;
		pCont = NULL;
	}
	nStartTypes = startCharTokenType;
	nContTypes = contCharTokenType;
	aStartChars = userDefinedCharactersStart;
	aContChars = userDefinedCharactersCont;

	// specials
	if( xLocaleData.is() )
	{
		LocaleDataItem aItem =
			xLocaleData->getLocaleItem( aParserLocale );
//!TODO: theoretically separators may be a string, adjustment would have to be
//! done here and in parsing and in ::rtl::math::stringToDouble()
		cGroupSep = aItem.thousandSeparator.getStr()[0];
        cDecimalSep = aItem.decimalSeparator.getStr()[0];
	}

	if ( cGroupSep < nDefCnt )
		pTable[cGroupSep] |= TOKEN_VALUE;
	if ( cDecimalSep < nDefCnt )
		pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;

	// Modify characters according to KParseTokens definitions.
	{
		using namespace KParseTokens;
		sal_uInt8 i;

		if ( !(nStartTypes & ASC_UPALPHA) )
			for ( i = 65; i < 91; i++ )
				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
		if ( !(nContTypes & ASC_UPALPHA) )
			for ( i = 65; i < 91; i++ )
				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character

		if ( !(nStartTypes & ASC_LOALPHA) )
			for ( i = 97; i < 123; i++ )
				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
		if ( !(nContTypes & ASC_LOALPHA) )
			for ( i = 97; i < 123; i++ )
				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character

		if ( nStartTypes & ASC_DIGIT )
			for ( i = 48; i < 58; i++ )
				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
		if ( !(nContTypes & ASC_DIGIT) )
			for ( i = 48; i < 58; i++ )
				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character

		if ( !(nStartTypes & ASC_UNDERSCORE) )
			pTable[95] &= ~TOKEN_CHAR_WORD;		// not allowed as start character
		if ( !(nContTypes & ASC_UNDERSCORE) )
			pTable[95] &= ~TOKEN_WORD;			// not allowed as cont character

		if ( nStartTypes & ASC_DOLLAR )
			pTable[36] |= TOKEN_CHAR_WORD;		// allowed as start character
		if ( nContTypes & ASC_DOLLAR )
			pTable[36] |= TOKEN_WORD;			// allowed as cont character

		if ( nStartTypes & ASC_DOT )
			pTable[46] |= TOKEN_CHAR_WORD;		// allowed as start character
		if ( nContTypes & ASC_DOT )
			pTable[46] |= TOKEN_WORD;			// allowed as cont character

		if ( nStartTypes & ASC_COLON )
			pTable[58] |= TOKEN_CHAR_WORD;		// allowed as start character
		if ( nContTypes & ASC_COLON )
			pTable[58] |= TOKEN_WORD;			// allowed as cont character

		if ( nStartTypes & ASC_CONTROL )
			for ( i = 1; i < 32; i++ )
				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
		if ( nContTypes & ASC_CONTROL )
			for ( i = 1; i < 32; i++ )
				pTable[i] |= TOKEN_WORD;		// allowed as cont character

		if ( nStartTypes & ASC_ANY_BUT_CONTROL )
			for ( i = 32; i < nDefCnt; i++ )
				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
		if ( nContTypes & ASC_ANY_BUT_CONTROL )
			for ( i = 32; i < nDefCnt; i++ )
				pTable[i] |= TOKEN_WORD;		// allowed as cont character

	}

	// Merge in (positively override with) user defined characters.
	// StartChars
    sal_Int32 nLen = aStartChars.getLength();
	if ( nLen )
	{
		if ( !pStart )
			pStart = new UPT_FLAG_TYPE[ nLen ];
        const sal_Unicode* p = aStartChars.getStr();
        for ( sal_Int32 j=0; j<nLen; j++, p++ )
		{
			pStart[j] = TOKEN_CHAR_WORD;
            if ( *p < nDefCnt )
                pTable[*p] |= TOKEN_CHAR_WORD;
		}
	}
	// ContChars
    nLen = aContChars.getLength();
	if ( nLen )
	{
		if ( !pCont )
			pCont = new UPT_FLAG_TYPE[ nLen ];
        const sal_Unicode* p = aContChars.getStr();
        for ( sal_Int32 j=0; j<nLen; j++ )
		{
			pCont[j] = TOKEN_WORD;
            if ( *p < nDefCnt )
                pTable[*p] |= TOKEN_WORD;
		}
	}
}


void cclass_Unicode::destroyParserTable()
{
	if ( pCont )
		delete [] pCont;
	if ( pStart )
		delete [] pStart;
	if ( pTable )
		delete [] pTable;
}


UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
{
	UPT_FLAG_TYPE nMask;
	sal_Unicode c = aStr[nPos];
	if ( c < nDefCnt )
		nMask = pTable[ sal_uInt8(c) ];
	else
		nMask = getFlagsExtended( aStr, nPos );
	switch ( eState )
	{
		case ssGetChar :
        case ssRewindFromValue :
        case ssIgnoreLeadingInRewind :
		case ssGetWordFirstChar :
			if ( !(nMask & TOKEN_CHAR_WORD) )
			{
				nMask |= getStartCharsFlags( c );
				if ( nMask & TOKEN_CHAR_WORD )
					nMask &= ~TOKEN_EXCLUDED;
			}
		break;
		case ssGetValue :
		case ssGetWord :
			if ( !(nMask & TOKEN_WORD) )
			{
				nMask |= getContCharsFlags( c );
				if ( nMask & TOKEN_WORD )
					nMask &= ~TOKEN_EXCLUDED;
			}
		break;
        default:
            ;   // other cases aren't needed, no compiler warning
	}
	return nMask;
}


UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
{
	sal_Unicode c = aStr[nPos];
	if ( c == cGroupSep )
		return TOKEN_VALUE;
	else if ( c == cDecimalSep )
		return TOKEN_CHAR_VALUE | TOKEN_VALUE;
	using namespace i18n;
    bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
            eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
	sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);

	//! all KParseTokens::UNI_... must be matched
    switch ( u_charType( (sal_uInt32) c ) )
	{
		case U_UPPERCASE_LETTER :
			return (nTypes & KParseTokens::UNI_UPALPHA) ?
				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
				TOKEN_ILLEGAL;
		case U_LOWERCASE_LETTER :
			return (nTypes & KParseTokens::UNI_LOALPHA) ?
				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
				TOKEN_ILLEGAL;
		case U_TITLECASE_LETTER :
			return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
				TOKEN_ILLEGAL;
		case U_MODIFIER_LETTER :
			return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
				TOKEN_ILLEGAL;
		case U_NON_SPACING_MARK :
        case U_COMBINING_SPACING_MARK :
            // Non_Spacing_Mark can't be a leading character,
            // nor can a spacing combining mark.
            if (bStart)
                return TOKEN_ILLEGAL;
			// fall through, treat it as Other_Letter.
		case U_OTHER_LETTER :
			return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
				TOKEN_ILLEGAL;
		case U_DECIMAL_DIGIT_NUMBER :
			return ((nTypes & KParseTokens::UNI_DIGIT) ?
				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
		case U_LETTER_NUMBER :
			return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
		case U_OTHER_NUMBER :
			return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
		case U_SPACE_SEPARATOR :
			return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
				TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
	}

	return TOKEN_ILLEGAL;
}


UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
{
	if ( pStart )
	{
        const sal_Unicode* pStr = aStartChars.getStr();
		const sal_Unicode* p = StrChr( pStr, c );
		if ( p )
			return pStart[ p - pStr ];
	}
	return TOKEN_ILLEGAL;
}


UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
{
	if ( pCont )
	{
        const sal_Unicode* pStr = aContChars.getStr();
		const sal_Unicode* p = StrChr( pStr, c );
		if ( p )
			return pCont[ p - pStr ];
	}
	return TOKEN_ILLEGAL;
}


void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
{
	using namespace i18n;
	const sal_Unicode* const pTextStart = rText.getStr() + nPos;
	eState = ssGetChar;

    //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
	const sal_Unicode* pSym = pTextStart;
	const sal_Unicode* pSrc = pSym;
	OUString aSymbol;
	sal_Unicode c = *pSrc;
	sal_Unicode cLast = 0;
    int nDecSeps = 0;
	bool bQuote = false;
	bool bMightBeWord = true;
	bool bMightBeWordLast = true;
    //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!

	while ( (c != 0) && (eState != ssStop) )
	{
		UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
		if ( nMask & TOKEN_EXCLUDED )
			eState = ssBounce;
		if ( bMightBeWord )
		{	// only relevant for ssGetValue fall back
			if ( eState == ssGetChar || eState == ssRewindFromValue ||
                    eState == ssIgnoreLeadingInRewind )
				bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
			else
				bMightBeWord = ((nMask & TOKEN_WORD) != 0);
		}
		sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
		pSrc++;
		switch (eState)
		{
			case ssGetChar :
            case ssRewindFromValue :
            case ssIgnoreLeadingInRewind :
			{
                if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
                        && eState != ssIgnoreLeadingInRewind )
				{	//! must be first, may fall back to ssGetWord via bMightBeWord
					eState = ssGetValue;
					if ( nMask & TOKEN_VALUE_DIGIT )
                    {
                        if ( 128 <= c )
                            r.TokenType = KParseType::UNI_NUMBER;
                        else
                            r.TokenType = KParseType::ASC_NUMBER;
                    }
                    else if ( c == cDecimalSep )
                    {
                        if ( *pSrc )
                            ++nDecSeps;
                        else
                            eState = ssRewindFromValue;
                            // retry for ONE_SINGLE_CHAR or others
                    }
				}
				else if ( nMask & TOKEN_CHAR_WORD )
				{
					eState = ssGetWord;
					r.TokenType = KParseType::IDENTNAME;
				}
				else if ( nMask & TOKEN_NAME_SEP )
				{
					eState = ssGetWordFirstChar;
					bQuote = true;
					pSym++;
					nParseTokensType = 0;	// will be taken of first real character
					r.TokenType = KParseType::SINGLE_QUOTE_NAME;
				}
				else if ( nMask & TOKEN_CHAR_STRING )
				{
					eState = ssGetString;
					pSym++;
					nParseTokensType = 0;	// will be taken of first real character
					r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
				}
				else if ( nMask & TOKEN_CHAR_DONTCARE )
				{
					if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
					{
                        if (eState == ssRewindFromValue)
                            eState = ssIgnoreLeadingInRewind;
						r.LeadingWhiteSpace++;
						pSym++;
						nParseTokensType = 0;	// wait until real character
						bMightBeWord = true;
					}
					else
						eState = ssBounce;
				}
				else if ( nMask & TOKEN_CHAR_BOOL )
				{
					eState = ssGetBool;
					r.TokenType = KParseType::BOOLEAN;
				}
				else if ( nMask & TOKEN_CHAR )
				{	//! must be last
					eState = ssStop;
					r.TokenType = KParseType::ONE_SINGLE_CHAR;
				}
				else
					eState = ssBounce;		// not known
			}
			break;
			case ssGetValue :
			{
                if ( nMask & TOKEN_VALUE_DIGIT )
                {
                    if ( 128 <= c )
                        r.TokenType = KParseType::UNI_NUMBER;
                    else if ( r.TokenType != KParseType::UNI_NUMBER )
                        r.TokenType = KParseType::ASC_NUMBER;
                }
                if ( nMask & TOKEN_VALUE )
                {
                    if ( c == cDecimalSep && ++nDecSeps > 1 )
                    {
                        if ( pSrc - pTextStart == 2 )
                            eState = ssRewindFromValue;
                            // consecutive separators
                        else
                            eState = ssStopBack;
                    }
                    // else keep it going
                }
				else if ( c == 'E' || c == 'e' )
				{
					UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
					if ( nNext & TOKEN_VALUE_EXP )
						;	// keep it going
					else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
					{	// might be a numerical name (1.2efg)
						eState = ssGetWord;
						r.TokenType = KParseType::IDENTNAME;
					}
					else
						eState = ssStopBack;
				}
				else if ( nMask & TOKEN_VALUE_SIGN )
				{
					if ( (cLast == 'E') || (cLast == 'e') )
					{
						UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
						if ( nNext & TOKEN_VALUE_EXP_VALUE )
							;	// keep it going
						else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
						{	// might be a numerical name (1.2e+fg)
							eState = ssGetWord;
							r.TokenType = KParseType::IDENTNAME;
						}
						else
							eState = ssStopBack;
					}
					else if ( bMightBeWord )
					{	// might be a numerical name (1.2+fg)
						eState = ssGetWord;
						r.TokenType = KParseType::IDENTNAME;
					}
					else
						eState = ssStopBack;
				}
				else if ( bMightBeWord && (nMask & TOKEN_WORD) )
				{	// might be a numerical name (1995.A1)
					eState = ssGetWord;
					r.TokenType = KParseType::IDENTNAME;
				}
				else
					eState = ssStopBack;
			}
			break;
			case ssGetWordFirstChar :
				eState = ssGetWord;
				// fall thru
			case ssGetWord :
			{
				if ( nMask & TOKEN_WORD )
					;	// keep it going
				else if ( nMask & TOKEN_NAME_SEP )
				{
					if ( bQuote )
					{
						if ( cLast == '\\' )
						{	// escaped
							aSymbol += OUString( pSym, pSrc - pSym - 2 );
							aSymbol += OUString( &c, 1);
						}
						else
						{
							eState = ssStop;
							aSymbol += OUString( pSym, pSrc - pSym - 1 );
						}
						pSym = pSrc;
					}
					else
						eState = ssStopBack;
				}
				else if ( bQuote )
					;	// keep it going
				else
					eState = ssStopBack;
			}
			break;
			case ssGetString :
			{
				if ( nMask & TOKEN_STRING_SEP )
				{
					if ( cLast == '\\' )
					{	// escaped
						aSymbol += OUString( pSym, pSrc - pSym - 2 );
						aSymbol += OUString( &c, 1);
					}
                    else if ( c == *pSrc &&
                            !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
					{	// "" => literal " escaped
						aSymbol += OUString( pSym, pSrc - pSym );
						pSrc++;
					}
					else
					{
						eState = ssStop;
						aSymbol += OUString( pSym, pSrc - pSym - 1 );
					}
					pSym = pSrc;
				}
			}
			break;
			case ssGetBool :
			{
				if ( (nMask & TOKEN_BOOL) )
					eState = ssStop;	// maximum 2: <, >, <>, <=, >=
				else
					eState = ssStopBack;
			}
			break;
            case ssStopBack :
            case ssBounce :
            case ssStop :
                ;   // nothing, no compiler warning
            break;
		}
        if ( eState == ssRewindFromValue )
        {
            r = ParseResult();
            pSym = pTextStart;
            pSrc = pSym;
            aSymbol = OUString();
            c = *pSrc;
            cLast = 0;
            nDecSeps = 0;
            bQuote = false;
            bMightBeWord = true;
            bMightBeWordLast = true;
        }
        else
        {
            if ( !(r.TokenType & nTokenType) )
            {
                if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
                        && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
                    ;	// keep a number that might be a word
                else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
                    ;	// keep ignored white space
                else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
                    ;   // keep uncertain value
                else
                    eState = ssBounce;
            }
            if ( eState == ssBounce )
            {
                r.TokenType = 0;
                eState = ssStopBack;
            }
            if ( eState == ssStopBack )
            {	// put back
                pSrc--;
                bMightBeWord = bMightBeWordLast;
                eState = ssStop;
            }
            if ( eState != ssStop )
            {
                if ( !r.StartFlags )
                    r.StartFlags |= nParseTokensType;
                else
                    r.ContFlags |= nParseTokensType;
            }
            bMightBeWordLast = bMightBeWord;
            cLast = c;
            c = *pSrc;
        }
	}
	// r.CharLen is the length in characters (not code points) of the parsed
	// token not including any leading white space, change this calculation if
	// multi-code-point Unicode characters are to be supported.
	r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
	r.EndPos = nPos + (pSrc - pTextStart);
	if ( r.TokenType & KParseType::ASC_NUMBER )
	{
        r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
                pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
		if ( bMightBeWord )
			r.TokenType |= KParseType::IDENTNAME;
	}
	else if ( r.TokenType & KParseType::UNI_NUMBER )
	{
        if ( !xNatNumSup.is() )
        {
#define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
            if ( xMSF.is() )
            {
                xNatNumSup = Reference< XNativeNumberSupplier > (
                        xMSF->createInstance( OUString(
                                RTL_CONSTASCII_USTRINGPARAM(
                                    NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
                        UNO_QUERY );
            }
            if ( !xNatNumSup.is() )
            {
                throw RuntimeException( OUString(
#ifdef DBG_UTIL
                    RTL_CONSTASCII_USTRINGPARAM(
                        "cclass_Unicode::parseText: can't instanciate "
                        NATIVENUMBERSUPPLIER_SERVICENAME )
#endif
                    ), *this );
            }
#undef NATIVENUMBERSUPPLIER_SERVICENAME
        }
        OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
                r.LeadingWhiteSpace );
        // transliterate to ASCII
        aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
                NativeNumberMode::NATNUM0 );
        r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
		if ( bMightBeWord )
			r.TokenType |= KParseType::IDENTNAME;
	}
	else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
	{
		if ( pSym < pSrc )
		{	//! open quote
			aSymbol += OUString( pSym, pSrc - pSym );
			r.TokenType |= KParseType::MISSING_QUOTE;
		}
		r.DequotedNameOrString = aSymbol;
	}
}

} } } }