xref: /aoo41x/main/starmath/inc/parse.hxx (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir #ifndef PARSE_HXX
28*cdf0e10cSrcweir #define PARSE_HXX
29*cdf0e10cSrcweir 
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <vcl/svapp.hxx>
32*cdf0e10cSrcweir #include <tools/stack.hxx>
33*cdf0e10cSrcweir #include <tools/list.hxx>
34*cdf0e10cSrcweir #include <tools/string.hxx>
35*cdf0e10cSrcweir 
36*cdf0e10cSrcweir #include <set>
37*cdf0e10cSrcweir #include <stack>
38*cdf0e10cSrcweir #include <list>
39*cdf0e10cSrcweir 
40*cdf0e10cSrcweir #include "types.hxx"
41*cdf0e10cSrcweir 
42*cdf0e10cSrcweir class SmNode;
43*cdf0e10cSrcweir class SmDocShell;
44*cdf0e10cSrcweir 
45*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir // TokenGroups
48*cdf0e10cSrcweir #define TGOPER			0x00000001
49*cdf0e10cSrcweir #define TGRELATION		0x00000002
50*cdf0e10cSrcweir #define TGSUM			0x00000004
51*cdf0e10cSrcweir #define TGPRODUCT		0x00000008
52*cdf0e10cSrcweir #define TGUNOPER		0x00000010
53*cdf0e10cSrcweir #define TGPOWER 		0x00000020
54*cdf0e10cSrcweir #define TGATTRIBUT		0x00000040
55*cdf0e10cSrcweir #define TGALIGN 		0x00000080
56*cdf0e10cSrcweir #define TGFUNCTION		0x00000100
57*cdf0e10cSrcweir #define TGBLANK 		0x00000200
58*cdf0e10cSrcweir #define TGLBRACES		0x00000400
59*cdf0e10cSrcweir #define TGRBRACES		0x00000800
60*cdf0e10cSrcweir #define TGCOLOR 		0x00001000
61*cdf0e10cSrcweir #define TGFONT			0x00002000
62*cdf0e10cSrcweir #define TGSTANDALONE	0x00004000
63*cdf0e10cSrcweir #define TGDISCARDED		0x00008000
64*cdf0e10cSrcweir #define TGLIMIT			0x00010000
65*cdf0e10cSrcweir #define TGFONTATTR		0x00020000
66*cdf0e10cSrcweir 
67*cdf0e10cSrcweir 
68*cdf0e10cSrcweir enum SmTokenType
69*cdf0e10cSrcweir {
70*cdf0e10cSrcweir 	TEND,			TLGROUP,		TRGROUP,		TLPARENT,		TRPARENT,
71*cdf0e10cSrcweir 	TLBRACKET,		TRBRACKET,		TPLUS,			TMINUS,			TMULTIPLY,
72*cdf0e10cSrcweir 	TDIVIDEBY,		TASSIGN,		TPOUND,			TSPECIAL,		TSLASH,
73*cdf0e10cSrcweir 	TBACKSLASH,		TBLANK,			TSBLANK,		TRSUB,			TRSUP,
74*cdf0e10cSrcweir 	TCSUB,			TCSUP,			TLSUB,			TLSUP,			TGT,
75*cdf0e10cSrcweir 	TLT,			TAND,			TOR,			TINTERSECT,		TUNION,
76*cdf0e10cSrcweir 	TNEWLINE,		TBINOM,			TFROM,			TTO,			TINT,
77*cdf0e10cSrcweir 	TSUM,			TOPER,			TABS,			TSQRT,			TFACT,
78*cdf0e10cSrcweir 	TNROOT,			TOVER,			TTIMES,			TGE,			TLE,
79*cdf0e10cSrcweir 	TGG,			TLL,			TDOTSAXIS,		TDOTSLOW,		TDOTSVERT,
80*cdf0e10cSrcweir 	TDOTSDIAG,		TDOTSUP,		TDOTSDOWN,		TACUTE,			TBAR,
81*cdf0e10cSrcweir 	TBREVE,			TCHECK,			TCIRCLE,		TDOT,			TDDOT,
82*cdf0e10cSrcweir 	TDDDOT,			TGRAVE,			THAT,			TTILDE,			TVEC,
83*cdf0e10cSrcweir 	TUNDERLINE,		TOVERLINE,		TOVERSTRIKE,	TITALIC,		TNITALIC,
84*cdf0e10cSrcweir 	TBOLD,			TNBOLD,			TPHANTOM,		TFONT,			TSIZE,
85*cdf0e10cSrcweir 	TCOLOR,			TALIGNL,		TALIGNC,		TALIGNR,		TLEFT,
86*cdf0e10cSrcweir 	TRIGHT,			TLANGLE,		TLBRACE,		TLLINE,			TLDLINE,
87*cdf0e10cSrcweir 	TLCEIL,			TLFLOOR,		TNONE,			TMLINE,			TRANGLE,
88*cdf0e10cSrcweir 	TRBRACE,		TRLINE,			TRDLINE,		TRCEIL,			TRFLOOR,
89*cdf0e10cSrcweir 	TSIN,			TCOS,			TTAN,			TCOT,			TFUNC,
90*cdf0e10cSrcweir 	TSTACK,			TMATRIX,		TMATFORM,		TDPOUND,		TPLACE,
91*cdf0e10cSrcweir 	TTEXT,			TNUMBER,		TCHARACTER,		TIDENT,			TNEQ,
92*cdf0e10cSrcweir 	TEQUIV,			TDEF,			TPROP,			TSIM,			TSIMEQ,
93*cdf0e10cSrcweir 	TAPPROX,		TPARALLEL,		TORTHO,			TIN,			TNOTIN,
94*cdf0e10cSrcweir 	TSUBSET,		TSUBSETEQ,		TSUPSET,		TSUPSETEQ,		TPLUSMINUS,
95*cdf0e10cSrcweir 	TMINUSPLUS,		TOPLUS,			TOMINUS,		TDIV,			TOTIMES,
96*cdf0e10cSrcweir 	TODIVIDE,		TTRANSL,		TTRANSR,		TIINT,			TIIINT,
97*cdf0e10cSrcweir 	TLINT,			TLLINT,			TLLLINT,		TPROD,			TCOPROD,
98*cdf0e10cSrcweir 	TFORALL,		TEXISTS,		TLIM,			TNABLA,			TTOWARD,
99*cdf0e10cSrcweir 	TSINH,			TCOSH,			TTANH,			TCOTH,			TASIN,
100*cdf0e10cSrcweir 	TACOS,			TATAN,			TLN,			TLOG,			TUOPER,
101*cdf0e10cSrcweir 	TBOPER,			TBLACK,			TWHITE,			TRED,			TGREEN,
102*cdf0e10cSrcweir 	TBLUE,			TCYAN,			TMAGENTA,		TYELLOW,		TFIXED,
103*cdf0e10cSrcweir 	TSANS,			TSERIF,			TPOINT,			TASINH,			TACOSH,
104*cdf0e10cSrcweir 	TATANH,			TACOTH,			TACOT,			TEXP,			TCDOT,
105*cdf0e10cSrcweir 	TODOT,			TLESLANT,		TGESLANT,		TNSUBSET,		TNSUPSET,
106*cdf0e10cSrcweir 	TNSUBSETEQ,		TNSUPSETEQ,		TPARTIAL,		TNEG,			TNI,
107*cdf0e10cSrcweir 	TBACKEPSILON,	TALEPH,			TIM,			TRE,			TWP,
108*cdf0e10cSrcweir 	TEMPTYSET,		TINFINITY,		TESCAPE,		TLIMSUP,		TLIMINF,
109*cdf0e10cSrcweir 	TNDIVIDES,		TDRARROW,		TDLARROW,		TDLRARROW,		TUNDERBRACE,
110*cdf0e10cSrcweir 	TOVERBRACE,		TCIRC,			TTOP,			THBAR,			TLAMBDABAR,
111*cdf0e10cSrcweir 	TLEFTARROW,		TRIGHTARROW,	TUPARROW,		TDOWNARROW,		TDIVIDES,
112*cdf0e10cSrcweir 	TNDIBVIDES,		TSETN,			TSETZ,			TSETQ,			TSETR,
113*cdf0e10cSrcweir 	TSETC,			TWIDEVEC,		TWIDETILDE,		TWIDEHAT,		TWIDESLASH,
114*cdf0e10cSrcweir 	TWIDEBACKSLASH, TLDBRACKET,		TRDBRACKET,     TNOSPACE,
115*cdf0e10cSrcweir 	TUNKNOWN,		TDEBUG
116*cdf0e10cSrcweir };
117*cdf0e10cSrcweir 
118*cdf0e10cSrcweir 
119*cdf0e10cSrcweir struct SmToken
120*cdf0e10cSrcweir {
121*cdf0e10cSrcweir 
122*cdf0e10cSrcweir     String          aText;      // token text
123*cdf0e10cSrcweir     SmTokenType     eType;      // token info
124*cdf0e10cSrcweir     sal_Unicode		cMathChar;
125*cdf0e10cSrcweir 
126*cdf0e10cSrcweir     // parse-help info
127*cdf0e10cSrcweir     sal_uLong       nGroup;
128*cdf0e10cSrcweir     sal_uInt16      nLevel;
129*cdf0e10cSrcweir 
130*cdf0e10cSrcweir     // token position
131*cdf0e10cSrcweir     sal_uInt16      nRow;
132*cdf0e10cSrcweir     xub_StrLen      nCol;
133*cdf0e10cSrcweir 
134*cdf0e10cSrcweir 	SmToken();
135*cdf0e10cSrcweir };
136*cdf0e10cSrcweir 
137*cdf0e10cSrcweir 
138*cdf0e10cSrcweir enum SmParseError
139*cdf0e10cSrcweir {
140*cdf0e10cSrcweir 	PE_NONE,					PE_UNEXPECTED_END_OF_INPUT,
141*cdf0e10cSrcweir 	PE_UNEXPECTED_CHAR,			PE_UNEXPECTED_TOKEN,
142*cdf0e10cSrcweir 	PE_FUNC_EXPECTED,			PE_UNOPER_EXPECTED,
143*cdf0e10cSrcweir 	PE_BINOPER_EXPECTED,		PE_SYMBOL_EXPECTED,
144*cdf0e10cSrcweir 	PE_IDENTIFIER_EXPECTED,		PE_POUND_EXPECTED,
145*cdf0e10cSrcweir 	PE_COLOR_EXPECTED,			PE_LGROUP_EXPECTED,
146*cdf0e10cSrcweir 	PE_RGROUP_EXPECTED,			PE_LBRACE_EXPECTED,
147*cdf0e10cSrcweir 	PE_RBRACE_EXPECTED,			PE_PARENT_MISMATCH,
148*cdf0e10cSrcweir 	PE_RIGHT_EXPECTED,			PE_FONT_EXPECTED,
149*cdf0e10cSrcweir 	PE_SIZE_EXPECTED,			PE_DOUBLE_ALIGN,
150*cdf0e10cSrcweir 	PE_DOUBLE_SUBSUPSCRIPT
151*cdf0e10cSrcweir };
152*cdf0e10cSrcweir 
153*cdf0e10cSrcweir 
154*cdf0e10cSrcweir struct SmErrorDesc
155*cdf0e10cSrcweir {
156*cdf0e10cSrcweir 	SmParseError  Type;
157*cdf0e10cSrcweir 	SmNode		 *pNode;
158*cdf0e10cSrcweir 	String		  Text;
159*cdf0e10cSrcweir };
160*cdf0e10cSrcweir 
161*cdf0e10cSrcweir 
162*cdf0e10cSrcweir DECLARE_STACK(SmNodeStack,  SmNode *)
163*cdf0e10cSrcweir DECLARE_LIST(SmErrDescList, SmErrorDesc *)
164*cdf0e10cSrcweir 
165*cdf0e10cSrcweir /**************************************************************************/
166*cdf0e10cSrcweir 
167*cdf0e10cSrcweir // defines possible conversions of the formula text from the format of
168*cdf0e10cSrcweir // one release to the one of another.
169*cdf0e10cSrcweir enum SmConvert
170*cdf0e10cSrcweir {
171*cdf0e10cSrcweir     CONVERT_NONE,
172*cdf0e10cSrcweir     CONVERT_40_TO_50,
173*cdf0e10cSrcweir     CONVERT_50_TO_60,
174*cdf0e10cSrcweir     CONVERT_60_TO_50
175*cdf0e10cSrcweir };
176*cdf0e10cSrcweir 
177*cdf0e10cSrcweir 
178*cdf0e10cSrcweir class SmParser
179*cdf0e10cSrcweir {
180*cdf0e10cSrcweir     String          m_aBufferString;
181*cdf0e10cSrcweir     SmToken         m_aCurToken;
182*cdf0e10cSrcweir     SmNodeStack     m_aNodeStack;
183*cdf0e10cSrcweir     SmErrDescList   m_aErrDescList;
184*cdf0e10cSrcweir     int             m_nCurError;
185*cdf0e10cSrcweir     LanguageType    m_nLang;
186*cdf0e10cSrcweir     xub_StrLen      m_nBufferIndex,
187*cdf0e10cSrcweir                     m_nTokenIndex;
188*cdf0e10cSrcweir     sal_uInt16          m_Row,
189*cdf0e10cSrcweir                     m_nColOff;
190*cdf0e10cSrcweir     SmConvert       m_eConversion;
191*cdf0e10cSrcweir     sal_Bool        m_bImportSymNames,
192*cdf0e10cSrcweir                     m_bExportSymNames;
193*cdf0e10cSrcweir 
194*cdf0e10cSrcweir     // map of used symbols (used to reduce file size by exporting only actually used symbols)
195*cdf0e10cSrcweir     std::set< rtl::OUString >   m_aUsedSymbols;
196*cdf0e10cSrcweir 
197*cdf0e10cSrcweir 	// declare copy-constructor and assignment-operator private
198*cdf0e10cSrcweir 	SmParser(const SmParser &);
199*cdf0e10cSrcweir 	SmParser & operator = (const SmParser &);
200*cdf0e10cSrcweir 
201*cdf0e10cSrcweir protected:
202*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL
203*cdf0e10cSrcweir 	sal_Bool			IsDelimiter( const String &rTxt, xub_StrLen nPos );
204*cdf0e10cSrcweir #endif
205*cdf0e10cSrcweir 	void			NextToken();
206*cdf0e10cSrcweir     xub_StrLen      GetTokenIndex() const   { return m_nTokenIndex; }
207*cdf0e10cSrcweir 	void 			Insert(const String &rText, sal_uInt16 nPos);
208*cdf0e10cSrcweir     void            Replace( sal_uInt16 nPos, sal_uInt16 nLen, const String &rText );
209*cdf0e10cSrcweir 
210*cdf0e10cSrcweir 	inline sal_Bool		TokenInGroup(sal_uLong nGroup);
211*cdf0e10cSrcweir 
212*cdf0e10cSrcweir 	// grammar
213*cdf0e10cSrcweir 	void	Table();
214*cdf0e10cSrcweir 	void	Line();
215*cdf0e10cSrcweir 	void	Expression();
216*cdf0e10cSrcweir 	void	Relation();
217*cdf0e10cSrcweir 	void	Sum();
218*cdf0e10cSrcweir 	void	Product();
219*cdf0e10cSrcweir 	void	SubSup(sal_uLong nActiveGroup);
220*cdf0e10cSrcweir 	void	OpSubSup();
221*cdf0e10cSrcweir 	void 	Power();
222*cdf0e10cSrcweir 	void	Blank();
223*cdf0e10cSrcweir 	void	Term();
224*cdf0e10cSrcweir 	void	Escape();
225*cdf0e10cSrcweir 	void	Operator();
226*cdf0e10cSrcweir 	void	Oper();
227*cdf0e10cSrcweir 	void	UnOper();
228*cdf0e10cSrcweir 	void	Align();
229*cdf0e10cSrcweir 	void	FontAttribut();
230*cdf0e10cSrcweir 	void	Attribut();
231*cdf0e10cSrcweir 	void	Font();
232*cdf0e10cSrcweir 	void	FontSize();
233*cdf0e10cSrcweir 	void	Color();
234*cdf0e10cSrcweir 	void	Brace();
235*cdf0e10cSrcweir 	void	Bracebody(sal_Bool bIsLeftRight);
236*cdf0e10cSrcweir 	void	Function();
237*cdf0e10cSrcweir 	void	Binom();
238*cdf0e10cSrcweir 	void	Stack();
239*cdf0e10cSrcweir 	void	Matrix();
240*cdf0e10cSrcweir 	void	Special();
241*cdf0e10cSrcweir 	void	GlyphSpecial();
242*cdf0e10cSrcweir 	// end of grammar
243*cdf0e10cSrcweir 
244*cdf0e10cSrcweir     LanguageType    GetLanguage() const { return m_nLang; }
245*cdf0e10cSrcweir     void            SetLanguage( LanguageType nNewLang ) { m_nLang = nNewLang; }
246*cdf0e10cSrcweir 
247*cdf0e10cSrcweir 	void	Error(SmParseError Error);
248*cdf0e10cSrcweir 
249*cdf0e10cSrcweir     void    ClearUsedSymbols()                              { m_aUsedSymbols.clear(); }
250*cdf0e10cSrcweir     void    AddToUsedSymbols( const String &rSymbolName )   { m_aUsedSymbols.insert( rSymbolName ); }
251*cdf0e10cSrcweir 
252*cdf0e10cSrcweir public:
253*cdf0e10cSrcweir                  SmParser();
254*cdf0e10cSrcweir 
255*cdf0e10cSrcweir 	SmNode		*Parse(const String &rBuffer);
256*cdf0e10cSrcweir 
257*cdf0e10cSrcweir     const String & GetText() const { return m_aBufferString; };
258*cdf0e10cSrcweir 
259*cdf0e10cSrcweir     SmConvert    GetConversion() const              { return m_eConversion; }
260*cdf0e10cSrcweir     void         SetConversion(SmConvert eConv)     { m_eConversion = eConv; }
261*cdf0e10cSrcweir 
262*cdf0e10cSrcweir     sal_Bool     IsImportSymbolNames() const        { return m_bImportSymNames; }
263*cdf0e10cSrcweir     void         SetImportSymbolNames(sal_Bool bVal)    { m_bImportSymNames = bVal; }
264*cdf0e10cSrcweir     sal_Bool     IsExportSymbolNames() const        { return m_bExportSymNames; }
265*cdf0e10cSrcweir     void         SetExportSymbolNames(sal_Bool bVal)    { m_bExportSymNames = bVal; }
266*cdf0e10cSrcweir 
267*cdf0e10cSrcweir 	sal_uInt16		 AddError(SmParseError Type, SmNode *pNode);
268*cdf0e10cSrcweir 
269*cdf0e10cSrcweir 	const SmErrorDesc *	NextError();
270*cdf0e10cSrcweir 	const SmErrorDesc *	PrevError();
271*cdf0e10cSrcweir     const SmErrorDesc * GetError(sal_uInt16 i = 0xFFFF);
272*cdf0e10cSrcweir 
273*cdf0e10cSrcweir     bool    IsUsedSymbol( const String &rSymbolName ) const { return m_aUsedSymbols.find( rSymbolName ) != m_aUsedSymbols.end(); }
274*cdf0e10cSrcweir     std::set< rtl::OUString >   GetUsedSymbols() const      { return m_aUsedSymbols; }
275*cdf0e10cSrcweir };
276*cdf0e10cSrcweir 
277*cdf0e10cSrcweir 
278*cdf0e10cSrcweir inline sal_Bool SmParser::TokenInGroup(sal_uLong nGroup)
279*cdf0e10cSrcweir {
280*cdf0e10cSrcweir     return (m_aCurToken.nGroup & nGroup) ? sal_True : sal_False;
281*cdf0e10cSrcweir }
282*cdf0e10cSrcweir 
283*cdf0e10cSrcweir 
284*cdf0e10cSrcweir #endif
285*cdf0e10cSrcweir 
286