xref: /trunk/main/starmath/inc/parse.hxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir #ifndef PARSE_HXX
28*cdf0e10cSrcweir #define PARSE_HXX
29*cdf0e10cSrcweir 
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <vcl/svapp.hxx>
32*cdf0e10cSrcweir #include <tools/stack.hxx>
33*cdf0e10cSrcweir #include <tools/list.hxx>
34*cdf0e10cSrcweir #include <tools/string.hxx>
35*cdf0e10cSrcweir 
36*cdf0e10cSrcweir #include <set>
37*cdf0e10cSrcweir #include <stack>
38*cdf0e10cSrcweir #include <list>
39*cdf0e10cSrcweir 
40*cdf0e10cSrcweir #include "types.hxx"
41*cdf0e10cSrcweir 
42*cdf0e10cSrcweir class SmNode;
43*cdf0e10cSrcweir class SmDocShell;
44*cdf0e10cSrcweir 
45*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir // TokenGroups
48*cdf0e10cSrcweir #define TGOPER          0x00000001
49*cdf0e10cSrcweir #define TGRELATION      0x00000002
50*cdf0e10cSrcweir #define TGSUM           0x00000004
51*cdf0e10cSrcweir #define TGPRODUCT       0x00000008
52*cdf0e10cSrcweir #define TGUNOPER        0x00000010
53*cdf0e10cSrcweir #define TGPOWER         0x00000020
54*cdf0e10cSrcweir #define TGATTRIBUT      0x00000040
55*cdf0e10cSrcweir #define TGALIGN         0x00000080
56*cdf0e10cSrcweir #define TGFUNCTION      0x00000100
57*cdf0e10cSrcweir #define TGBLANK         0x00000200
58*cdf0e10cSrcweir #define TGLBRACES       0x00000400
59*cdf0e10cSrcweir #define TGRBRACES       0x00000800
60*cdf0e10cSrcweir #define TGCOLOR         0x00001000
61*cdf0e10cSrcweir #define TGFONT          0x00002000
62*cdf0e10cSrcweir #define TGSTANDALONE    0x00004000
63*cdf0e10cSrcweir #define TGDISCARDED     0x00008000
64*cdf0e10cSrcweir #define TGLIMIT         0x00010000
65*cdf0e10cSrcweir #define TGFONTATTR      0x00020000
66*cdf0e10cSrcweir 
67*cdf0e10cSrcweir 
68*cdf0e10cSrcweir enum SmTokenType
69*cdf0e10cSrcweir {
70*cdf0e10cSrcweir     TEND,           TLGROUP,        TRGROUP,        TLPARENT,       TRPARENT,
71*cdf0e10cSrcweir     TLBRACKET,      TRBRACKET,      TPLUS,          TMINUS,         TMULTIPLY,
72*cdf0e10cSrcweir     TDIVIDEBY,      TASSIGN,        TPOUND,         TSPECIAL,       TSLASH,
73*cdf0e10cSrcweir     TBACKSLASH,     TBLANK,         TSBLANK,        TRSUB,          TRSUP,
74*cdf0e10cSrcweir     TCSUB,          TCSUP,          TLSUB,          TLSUP,          TGT,
75*cdf0e10cSrcweir     TLT,            TAND,           TOR,            TINTERSECT,     TUNION,
76*cdf0e10cSrcweir     TNEWLINE,       TBINOM,         TFROM,          TTO,            TINT,
77*cdf0e10cSrcweir     TSUM,           TOPER,          TABS,           TSQRT,          TFACT,
78*cdf0e10cSrcweir     TNROOT,         TOVER,          TTIMES,         TGE,            TLE,
79*cdf0e10cSrcweir     TGG,            TLL,            TDOTSAXIS,      TDOTSLOW,       TDOTSVERT,
80*cdf0e10cSrcweir     TDOTSDIAG,      TDOTSUP,        TDOTSDOWN,      TACUTE,         TBAR,
81*cdf0e10cSrcweir     TBREVE,         TCHECK,         TCIRCLE,        TDOT,           TDDOT,
82*cdf0e10cSrcweir     TDDDOT,         TGRAVE,         THAT,           TTILDE,         TVEC,
83*cdf0e10cSrcweir     TUNDERLINE,     TOVERLINE,      TOVERSTRIKE,    TITALIC,        TNITALIC,
84*cdf0e10cSrcweir     TBOLD,          TNBOLD,         TPHANTOM,       TFONT,          TSIZE,
85*cdf0e10cSrcweir     TCOLOR,         TALIGNL,        TALIGNC,        TALIGNR,        TLEFT,
86*cdf0e10cSrcweir     TRIGHT,         TLANGLE,        TLBRACE,        TLLINE,         TLDLINE,
87*cdf0e10cSrcweir     TLCEIL,         TLFLOOR,        TNONE,          TMLINE,         TRANGLE,
88*cdf0e10cSrcweir     TRBRACE,        TRLINE,         TRDLINE,        TRCEIL,         TRFLOOR,
89*cdf0e10cSrcweir     TSIN,           TCOS,           TTAN,           TCOT,           TFUNC,
90*cdf0e10cSrcweir     TSTACK,         TMATRIX,        TMATFORM,       TDPOUND,        TPLACE,
91*cdf0e10cSrcweir     TTEXT,          TNUMBER,        TCHARACTER,     TIDENT,         TNEQ,
92*cdf0e10cSrcweir     TEQUIV,         TDEF,           TPROP,          TSIM,           TSIMEQ,
93*cdf0e10cSrcweir     TAPPROX,        TPARALLEL,      TORTHO,         TIN,            TNOTIN,
94*cdf0e10cSrcweir     TSUBSET,        TSUBSETEQ,      TSUPSET,        TSUPSETEQ,      TPLUSMINUS,
95*cdf0e10cSrcweir     TMINUSPLUS,     TOPLUS,         TOMINUS,        TDIV,           TOTIMES,
96*cdf0e10cSrcweir     TODIVIDE,       TTRANSL,        TTRANSR,        TIINT,          TIIINT,
97*cdf0e10cSrcweir     TLINT,          TLLINT,         TLLLINT,        TPROD,          TCOPROD,
98*cdf0e10cSrcweir     TFORALL,        TEXISTS,        TLIM,           TNABLA,         TTOWARD,
99*cdf0e10cSrcweir     TSINH,          TCOSH,          TTANH,          TCOTH,          TASIN,
100*cdf0e10cSrcweir     TACOS,          TATAN,          TLN,            TLOG,           TUOPER,
101*cdf0e10cSrcweir     TBOPER,         TBLACK,         TWHITE,         TRED,           TGREEN,
102*cdf0e10cSrcweir     TBLUE,          TCYAN,          TMAGENTA,       TYELLOW,        TFIXED,
103*cdf0e10cSrcweir     TSANS,          TSERIF,         TPOINT,         TASINH,         TACOSH,
104*cdf0e10cSrcweir     TATANH,         TACOTH,         TACOT,          TEXP,           TCDOT,
105*cdf0e10cSrcweir     TODOT,          TLESLANT,       TGESLANT,       TNSUBSET,       TNSUPSET,
106*cdf0e10cSrcweir     TNSUBSETEQ,     TNSUPSETEQ,     TPARTIAL,       TNEG,           TNI,
107*cdf0e10cSrcweir     TBACKEPSILON,   TALEPH,         TIM,            TRE,            TWP,
108*cdf0e10cSrcweir     TEMPTYSET,      TINFINITY,      TESCAPE,        TLIMSUP,        TLIMINF,
109*cdf0e10cSrcweir     TNDIVIDES,      TDRARROW,       TDLARROW,       TDLRARROW,      TUNDERBRACE,
110*cdf0e10cSrcweir     TOVERBRACE,     TCIRC,          TTOP,           THBAR,          TLAMBDABAR,
111*cdf0e10cSrcweir     TLEFTARROW,     TRIGHTARROW,    TUPARROW,       TDOWNARROW,     TDIVIDES,
112*cdf0e10cSrcweir     TNDIBVIDES,     TSETN,          TSETZ,          TSETQ,          TSETR,
113*cdf0e10cSrcweir     TSETC,          TWIDEVEC,       TWIDETILDE,     TWIDEHAT,       TWIDESLASH,
114*cdf0e10cSrcweir     TWIDEBACKSLASH, TLDBRACKET,     TRDBRACKET,     TNOSPACE,
115*cdf0e10cSrcweir     TUNKNOWN,       TDEBUG
116*cdf0e10cSrcweir };
117*cdf0e10cSrcweir 
118*cdf0e10cSrcweir 
119*cdf0e10cSrcweir struct SmToken
120*cdf0e10cSrcweir {
121*cdf0e10cSrcweir 
122*cdf0e10cSrcweir     String          aText;      // token text
123*cdf0e10cSrcweir     SmTokenType     eType;      // token info
124*cdf0e10cSrcweir     sal_Unicode     cMathChar;
125*cdf0e10cSrcweir 
126*cdf0e10cSrcweir     // parse-help info
127*cdf0e10cSrcweir     sal_uLong       nGroup;
128*cdf0e10cSrcweir     sal_uInt16      nLevel;
129*cdf0e10cSrcweir 
130*cdf0e10cSrcweir     // token position
131*cdf0e10cSrcweir     sal_uInt16      nRow;
132*cdf0e10cSrcweir     xub_StrLen      nCol;
133*cdf0e10cSrcweir 
134*cdf0e10cSrcweir     SmToken();
135*cdf0e10cSrcweir };
136*cdf0e10cSrcweir 
137*cdf0e10cSrcweir 
138*cdf0e10cSrcweir enum SmParseError
139*cdf0e10cSrcweir {
140*cdf0e10cSrcweir     PE_NONE,                    PE_UNEXPECTED_END_OF_INPUT,
141*cdf0e10cSrcweir     PE_UNEXPECTED_CHAR,         PE_UNEXPECTED_TOKEN,
142*cdf0e10cSrcweir     PE_FUNC_EXPECTED,           PE_UNOPER_EXPECTED,
143*cdf0e10cSrcweir     PE_BINOPER_EXPECTED,        PE_SYMBOL_EXPECTED,
144*cdf0e10cSrcweir     PE_IDENTIFIER_EXPECTED,     PE_POUND_EXPECTED,
145*cdf0e10cSrcweir     PE_COLOR_EXPECTED,          PE_LGROUP_EXPECTED,
146*cdf0e10cSrcweir     PE_RGROUP_EXPECTED,         PE_LBRACE_EXPECTED,
147*cdf0e10cSrcweir     PE_RBRACE_EXPECTED,         PE_PARENT_MISMATCH,
148*cdf0e10cSrcweir     PE_RIGHT_EXPECTED,          PE_FONT_EXPECTED,
149*cdf0e10cSrcweir     PE_SIZE_EXPECTED,           PE_DOUBLE_ALIGN,
150*cdf0e10cSrcweir     PE_DOUBLE_SUBSUPSCRIPT
151*cdf0e10cSrcweir };
152*cdf0e10cSrcweir 
153*cdf0e10cSrcweir 
154*cdf0e10cSrcweir struct SmErrorDesc
155*cdf0e10cSrcweir {
156*cdf0e10cSrcweir     SmParseError  Type;
157*cdf0e10cSrcweir     SmNode       *pNode;
158*cdf0e10cSrcweir     String        Text;
159*cdf0e10cSrcweir };
160*cdf0e10cSrcweir 
161*cdf0e10cSrcweir 
162*cdf0e10cSrcweir DECLARE_STACK(SmNodeStack,  SmNode *)
163*cdf0e10cSrcweir DECLARE_LIST(SmErrDescList, SmErrorDesc *)
164*cdf0e10cSrcweir 
165*cdf0e10cSrcweir /**************************************************************************/
166*cdf0e10cSrcweir 
167*cdf0e10cSrcweir // defines possible conversions of the formula text from the format of
168*cdf0e10cSrcweir // one release to the one of another.
169*cdf0e10cSrcweir enum SmConvert
170*cdf0e10cSrcweir {
171*cdf0e10cSrcweir     CONVERT_NONE,
172*cdf0e10cSrcweir     CONVERT_40_TO_50,
173*cdf0e10cSrcweir     CONVERT_50_TO_60,
174*cdf0e10cSrcweir     CONVERT_60_TO_50
175*cdf0e10cSrcweir };
176*cdf0e10cSrcweir 
177*cdf0e10cSrcweir 
178*cdf0e10cSrcweir class SmParser
179*cdf0e10cSrcweir {
180*cdf0e10cSrcweir     String          m_aBufferString;
181*cdf0e10cSrcweir     SmToken         m_aCurToken;
182*cdf0e10cSrcweir     SmNodeStack     m_aNodeStack;
183*cdf0e10cSrcweir     SmErrDescList   m_aErrDescList;
184*cdf0e10cSrcweir     int             m_nCurError;
185*cdf0e10cSrcweir     LanguageType    m_nLang;
186*cdf0e10cSrcweir     xub_StrLen      m_nBufferIndex,
187*cdf0e10cSrcweir                     m_nTokenIndex;
188*cdf0e10cSrcweir     sal_uInt16          m_Row,
189*cdf0e10cSrcweir                     m_nColOff;
190*cdf0e10cSrcweir     SmConvert       m_eConversion;
191*cdf0e10cSrcweir     sal_Bool        m_bImportSymNames,
192*cdf0e10cSrcweir                     m_bExportSymNames;
193*cdf0e10cSrcweir 
194*cdf0e10cSrcweir     // map of used symbols (used to reduce file size by exporting only actually used symbols)
195*cdf0e10cSrcweir     std::set< rtl::OUString >   m_aUsedSymbols;
196*cdf0e10cSrcweir 
197*cdf0e10cSrcweir     // declare copy-constructor and assignment-operator private
198*cdf0e10cSrcweir     SmParser(const SmParser &);
199*cdf0e10cSrcweir     SmParser & operator = (const SmParser &);
200*cdf0e10cSrcweir 
201*cdf0e10cSrcweir protected:
202*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL
203*cdf0e10cSrcweir     sal_Bool            IsDelimiter( const String &rTxt, xub_StrLen nPos );
204*cdf0e10cSrcweir #endif
205*cdf0e10cSrcweir     void            NextToken();
206*cdf0e10cSrcweir     xub_StrLen      GetTokenIndex() const   { return m_nTokenIndex; }
207*cdf0e10cSrcweir     void            Insert(const String &rText, sal_uInt16 nPos);
208*cdf0e10cSrcweir     void            Replace( sal_uInt16 nPos, sal_uInt16 nLen, const String &rText );
209*cdf0e10cSrcweir 
210*cdf0e10cSrcweir     inline sal_Bool     TokenInGroup(sal_uLong nGroup);
211*cdf0e10cSrcweir 
212*cdf0e10cSrcweir     // grammar
213*cdf0e10cSrcweir     void    Table();
214*cdf0e10cSrcweir     void    Line();
215*cdf0e10cSrcweir     void    Expression();
216*cdf0e10cSrcweir     void    Relation();
217*cdf0e10cSrcweir     void    Sum();
218*cdf0e10cSrcweir     void    Product();
219*cdf0e10cSrcweir     void    SubSup(sal_uLong nActiveGroup);
220*cdf0e10cSrcweir     void    OpSubSup();
221*cdf0e10cSrcweir     void    Power();
222*cdf0e10cSrcweir     void    Blank();
223*cdf0e10cSrcweir     void    Term();
224*cdf0e10cSrcweir     void    Escape();
225*cdf0e10cSrcweir     void    Operator();
226*cdf0e10cSrcweir     void    Oper();
227*cdf0e10cSrcweir     void    UnOper();
228*cdf0e10cSrcweir     void    Align();
229*cdf0e10cSrcweir     void    FontAttribut();
230*cdf0e10cSrcweir     void    Attribut();
231*cdf0e10cSrcweir     void    Font();
232*cdf0e10cSrcweir     void    FontSize();
233*cdf0e10cSrcweir     void    Color();
234*cdf0e10cSrcweir     void    Brace();
235*cdf0e10cSrcweir     void    Bracebody(sal_Bool bIsLeftRight);
236*cdf0e10cSrcweir     void    Function();
237*cdf0e10cSrcweir     void    Binom();
238*cdf0e10cSrcweir     void    Stack();
239*cdf0e10cSrcweir     void    Matrix();
240*cdf0e10cSrcweir     void    Special();
241*cdf0e10cSrcweir     void    GlyphSpecial();
242*cdf0e10cSrcweir     // end of grammar
243*cdf0e10cSrcweir 
244*cdf0e10cSrcweir     LanguageType    GetLanguage() const { return m_nLang; }
245*cdf0e10cSrcweir     void            SetLanguage( LanguageType nNewLang ) { m_nLang = nNewLang; }
246*cdf0e10cSrcweir 
247*cdf0e10cSrcweir     void    Error(SmParseError Error);
248*cdf0e10cSrcweir 
249*cdf0e10cSrcweir     void    ClearUsedSymbols()                              { m_aUsedSymbols.clear(); }
250*cdf0e10cSrcweir     void    AddToUsedSymbols( const String &rSymbolName )   { m_aUsedSymbols.insert( rSymbolName ); }
251*cdf0e10cSrcweir 
252*cdf0e10cSrcweir public:
253*cdf0e10cSrcweir                  SmParser();
254*cdf0e10cSrcweir 
255*cdf0e10cSrcweir     SmNode      *Parse(const String &rBuffer);
256*cdf0e10cSrcweir 
257*cdf0e10cSrcweir     const String & GetText() const { return m_aBufferString; };
258*cdf0e10cSrcweir 
259*cdf0e10cSrcweir     SmConvert    GetConversion() const              { return m_eConversion; }
260*cdf0e10cSrcweir     void         SetConversion(SmConvert eConv)     { m_eConversion = eConv; }
261*cdf0e10cSrcweir 
262*cdf0e10cSrcweir     sal_Bool     IsImportSymbolNames() const        { return m_bImportSymNames; }
263*cdf0e10cSrcweir     void         SetImportSymbolNames(sal_Bool bVal)    { m_bImportSymNames = bVal; }
264*cdf0e10cSrcweir     sal_Bool     IsExportSymbolNames() const        { return m_bExportSymNames; }
265*cdf0e10cSrcweir     void         SetExportSymbolNames(sal_Bool bVal)    { m_bExportSymNames = bVal; }
266*cdf0e10cSrcweir 
267*cdf0e10cSrcweir     sal_uInt16       AddError(SmParseError Type, SmNode *pNode);
268*cdf0e10cSrcweir 
269*cdf0e10cSrcweir     const SmErrorDesc * NextError();
270*cdf0e10cSrcweir     const SmErrorDesc * PrevError();
271*cdf0e10cSrcweir     const SmErrorDesc * GetError(sal_uInt16 i = 0xFFFF);
272*cdf0e10cSrcweir 
273*cdf0e10cSrcweir     bool    IsUsedSymbol( const String &rSymbolName ) const { return m_aUsedSymbols.find( rSymbolName ) != m_aUsedSymbols.end(); }
274*cdf0e10cSrcweir     std::set< rtl::OUString >   GetUsedSymbols() const      { return m_aUsedSymbols; }
275*cdf0e10cSrcweir };
276*cdf0e10cSrcweir 
277*cdf0e10cSrcweir 
278*cdf0e10cSrcweir inline sal_Bool SmParser::TokenInGroup(sal_uLong nGroup)
279*cdf0e10cSrcweir {
280*cdf0e10cSrcweir     return (m_aCurToken.nGroup & nGroup) ? sal_True : sal_False;
281*cdf0e10cSrcweir }
282*cdf0e10cSrcweir 
283*cdf0e10cSrcweir 
284*cdf0e10cSrcweir #endif
285*cdf0e10cSrcweir 
286