1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula;
25 
26 
27 import java.util.Vector;
28 
29 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook;
30 import org.openoffice.xmerge.util.Debug;
31 
32 /**
33  * This is the Formula Parser based on an article written by Jack Crenshaw. It is a
34  * top down parser with some basic error handling. It handles
35  * +,-,*,/,>,<,>=,<=,=,<>, unary + and - as well as functions.
36  * The BNF notation for this parser is
37  * <pre>
38  *  &lt;expression&gt; ::= &lt;unary op&gt; &lt;term&gt; [&lt;addop&gt;|&lt;logop&gt; &lt;term&gt;]
39  *  &lt;term&gt;       ::= &lt;factor&gt; [&lt;mulop&gt; &lt;factor&gt;]
40  *  &lt;factor&gt;     ::= &lt;number&gt;[%] | &lt;CellRef&gt; | &lt;QuoteString&gt; | &lt;expression&gt;
41  * </pre>
42  */
43 public class FormulaParser {
44 
45     private char look;
46     private String formulaStr;
47     private int index = 1;
48     private TokenFactory tokenFactory;
49     private Vector tokenVector;
50     private Workbook wb;
51 
52     /**
53      * Default constructor
54      */
FormulaParser()55     public FormulaParser() {
56 
57         Debug.log(Debug.TRACE,"Creating a Formula Parser");
58         tokenFactory = new TokenFactory();
59         tokenVector = new Vector();
60     }
61 
62     /**
63      *
64      */
setWorkbook(Workbook wb)65     public void setWorkbook(Workbook wb) {
66 
67         this.wb = wb;
68     }
69 
70     /**
71      * Parse method for parsing from a String to a byte[]
72      *
73      * @param formula A <code>String</code> representation of a formula
74      * starting with the '=' character
75      * @return A <code>Vector</code> containing the parsed <code>Token</code>s
76      */
parse(String formula)77     public Vector parse(String formula) throws FormulaParsingException {
78 
79         index = 1;
80         look = ' ';
81         tokenVector.clear();
82         if(formula.startsWith("=")) {
83             formulaStr = formula;
84             Debug.log(Debug.TRACE,"Creating a Formula Parser for " + formulaStr);
85             getChar();
86             expression();
87         } else {
88             throw new FormulaParsingException("No equals found!" + makeErrorString());
89         }
90         return tokenVector;
91     }
92 
93     /**
94      * Identify + and - operators
95      *
96      * @param  c The character which is to be identified
97      * @return A boolean returning the result of the comparison
98      */
isAddOp(char c)99     private boolean isAddOp(char c) {
100         return (c == '-') || (c == '+');
101     }
102 
103     /**
104      * Determine if the current character is a multiop
105      *
106      * @return A boolean returning the result of the comparison
107      */
isMultiOp()108     private boolean isMultiOp() {
109         return look=='*' || look =='/' || look == '^' || look == '&';
110     }
111 
112     /**
113      * Identify &lt;, &gt;, &lt;=, &gt;=, =, &lt;&gt; using the index to find the current character(s)
114      *
115      * @return A boolean returning the result of the comparison
116      */
isLogicalOp()117     private boolean isLogicalOp() {
118         if (!isLogicalOpChar(look)) {
119             return false;
120         } else if ((index+1) >= formulaStr.length()) {//logical operators in their own right : if at end then return true
121             return true;
122         } else if (!isLogicalOpChar(formulaStr.charAt(index))) { // we have >, < or = on their own
123             return true;
124         } else if ((look == '<') && ((formulaStr.charAt(index) == '>') || formulaStr.charAt(index) == '=')) { // <>, or <=
125             return true;
126         } else if ((look == '>')  && (formulaStr.charAt(index) == '=')) { // >=
127             return true;
128         }
129 
130         return false;
131     }
132 
133     /**
134      * Identify &lt;, &gt;, &lt;=, &gt;=, =, &lt;&gt;
135      *
136      * @param op The <code>String</code> which is to be identified
137      * @return A boolean returning the result of the comparison
138      */
isLogicalOp(String op)139     private boolean isLogicalOp(String op) {
140         return  ((op.compareTo(">") == 0) ||
141                  (op.compareTo("<") == 0) ||
142                  (op.compareTo(">=") == 0) ||
143                  (op.compareTo("<=") == 0) ||
144                  (op.compareTo("=") == 0) ||
145                  (op.compareTo("<>") == 0));
146     }
147 
148 
149     /**
150      * Identify characters that MAY be logical operator characters
151      *
152      * @param  c The character which is to be identified
153      * @return A boolean returning the result of the comparison
154      */
isLogicalOpChar(char c)155     private boolean isLogicalOpChar(char c) {
156         return (c == '>') || (c == '<') || (c == '=');
157     }
158 
159     /**
160      * Identify special Cell Reference charaters
161      *
162      * @param  c The character which is to be identified
163      * @return A boolean returning the result of the comparison
164      */
isCellRefSpecialChar(char c)165     private boolean isCellRefSpecialChar(char c) {
166         return (c == ':') || (c == '$') || (c == '.');
167     }
168 
169     /**
170      * Identify letters
171      *
172      * @param  c The character which is to be identified
173      * @return A boolean returning the result of the comparison
174      */
isAlpha(char c)175     private boolean isAlpha(char c) {
176         return(Character.isLetter(c));
177     }
178 
179     /**
180      * Identify numbers
181      *
182      * @param  c The character which is to be identified
183      * @return A boolean returning the result of the comparison
184      */
isDigit(char c)185     private boolean isDigit(char c) {
186         return(Character.isDigit(c));
187     }
188 
189     /**
190      * Identify numbers
191      *
192      * @param  c The character which is to be identified
193      * @return A boolean returning the result of the comparison
194      */
isPercent(char c)195     private boolean isPercent(char c) {
196         return (c == '%');
197     }
198 
199     /**
200      * Identify letters or numbers
201      *
202      * @param  c The character which is to be identified
203      * @return A boolean returning the result of the comparison
204      */
isAlphaNum(char c)205     private boolean isAlphaNum(char c) {
206         return(isAlpha(c) || isDigit(c));
207     }
208 
209     /**
210      * Identify valid Characters for cell references
211      *
212      * @param  c The character which is to be identified
213      * @return A boolean returning the result of the comparison
214      */
isCellRefChar(char c)215     private boolean isCellRefChar(char c) {
216         return(isAlpha(c) || isDigit(c) || isCellRefSpecialChar(c));
217     }
218 
219     /**
220      * Test if current character is a match and move to next character
221      *
222      * @param  c The character which is to be matched
223      */
match(char c)224     private void match(char c) throws FormulaParsingException {
225 
226         if(look==c) {
227             Debug.log(Debug.TRACE,"Operator Found : " + look);
228             getChar();
229             skipWhite();
230         }
231         else
232             throw new FormulaParsingException("Unexpected character '" + c + "'" + makeErrorString());
233     }
234 
235     /**
236      * Test if current character is a match and move to next character
237      *
238      * @param symbol The <code>String</code> to be matched.
239      */
match(String symbol)240     private void match(String symbol) throws FormulaParsingException {
241 
242         int numChars = symbol.length();
243         boolean bContinue = true;
244         for (int i=0;i<numChars && bContinue; i++) {
245             if (look == symbol.charAt(i)) {
246                 bContinue = getChar();
247                 skipWhite();
248             } else {
249                 throw new FormulaParsingException("Unexpected character '" + symbol + "'" + makeErrorString());
250             }
251         }
252     }
253 
254     /**
255      * Skip over whitespaces (ie. spaces and tabs)
256      */
skipWhite()257     private void skipWhite() throws FormulaParsingException {
258 
259         boolean success = true;
260 
261         while(Character.isWhitespace(look) && success) {
262             success = getChar();
263         }
264     }
265 
266     /**
267      * This is a factor for multiplication and division operators
268      */
factor()269     private void factor() throws FormulaParsingException {
270         if(isAddOp(look)) {         // handle unary addop
271             Character ch = new Character(look);
272             match(look);
273             tokenVector.add(tokenFactory.getOperatorToken(ch.toString(), 1));
274         }
275         if(look=='(') {
276             match('(');
277             tokenVector.add(tokenFactory.getOperatorToken("(", 1));
278             expression();
279             match(')');
280                         tokenVector.add(tokenFactory.getOperatorToken(")", 1));
281         } else if(isDigit(look)){
282             getNum();
283         } else {
284             ident();
285         }
286     }
287 
288     /**
289      * Pulls the next character from the <code>String</code>
290      *
291      * @return boolean  false if the end if the statement
292      *                  is reached otherwise true
293      */
getChar()294     private boolean getChar() throws FormulaParsingException {
295 
296             boolean success = true;
297 
298             if(index<formulaStr.length()) {
299                 look = formulaStr.charAt(index);
300                 index++;
301                                 if(look==',')
302                                     success = false;
303             } else {
304                 success = false;
305             }
306             return success;
307     }
308 
309     /**
310      * Parses the number of arguments in a function
311      *
312      * @return The number of arguments
313      */
arguments()314     private int arguments() throws FormulaParsingException {
315         int numArgs;
316 
317         skipWhite();
318         if(look==')')
319             numArgs = 0;
320         else
321             numArgs = 1;
322 
323         while(look!=')') {
324             expression();
325             if(look==',') {
326                 numArgs++;
327                 match(',');
328                 tokenVector.add(tokenFactory.getOperatorToken(",", 1));
329             }
330         }
331         return numArgs;
332     }
333 
334     /**
335      * Test to see if we have come across a cell reference or a Name
336      * Definition.
337      */
isCellRef(String s)338      private boolean isCellRef(String s) {
339         char c;
340         boolean result = false;
341 
342         for(int i = 0;i<s.length();i++) {
343             c = s.charAt(i);
344             if(isCellRefSpecialChar(c)) {
345                 result = true;
346                 break;
347             }
348         }
349 
350         // if it is a simple cell reference then there will not be a cell
351         // reference 'special char' so we should also look for a digit
352         if(!result) {
353             if(isDigit(s.charAt(1)) || isDigit(s.charAt(2))) {
354                 result = true;
355             }
356         }
357         return result;
358      }
359 
360     /**
361      * Test to see if we have come across a cell reference or a function and
362      * add the resulting toek nto the tokenVector.
363      */
ident()364     private void ident() throws FormulaParsingException {
365 
366         String cell = getTokenString();
367         if(look=='(') {
368             Debug.log(Debug.TRACE,"Found Function : " + cell);
369 
370             int index = tokenVector.size();
371             match('(');
372             tokenVector.add(tokenFactory.getOperatorToken("(", 1));
373             int numArgs = arguments();
374             match(')');
375             tokenVector.add(tokenFactory.getOperatorToken(")", 1));
376             tokenVector.insertElementAt(tokenFactory.getFunctionToken(cell, numArgs), index);
377         } else {
378 
379             if(cell.indexOf('.')!=-1) {
380                 String cellRef = cell.substring(cell.indexOf('.') + 1, cell.length());
381                 if(cellRef.indexOf(':')!=-1) {
382                     tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_AREA_REFERENCE"));
383                 } else {
384                     tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_REFERENCE"));
385                 }
386             } else if(cell.indexOf(':')!=-1) {
387                 tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_AREA_REFERENCE"));
388             } else if(isCellRef(cell)) {
389                 tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_REFERENCE"));
390             } else {
391                 tokenVector.add(tokenFactory.getOperandToken(cell, "NAME"));
392             }
393         }
394     }
395 
396     /**
397      * Will keep pulling valid logical operators from the formula and return
398      * the resultant <code>String</code>.
399      *
400      * @return a <code>String</code> representing a logical operator
401      */
getLogicalOperator()402     private String getLogicalOperator() throws FormulaParsingException {
403         String op = new String();
404         boolean status;
405 
406         do {
407             op += look;
408             status = getChar();
409         } while(isLogicalOpChar(look) && status);
410         skipWhite();
411         return op;
412     }
413 
414     /**
415      * Keeps pulling characters from the statement until we get an
416      * operator and returns the resulting string.
417      *
418      * @return A <code>String</code>representing the next token
419      */
getTokenString()420     private String getTokenString() throws FormulaParsingException {
421 
422         if(!isAlpha(look) && look!='$')
423             throw new FormulaParsingException("Expected Cell Reference" + makeErrorString());
424         else {
425             String cell = new String();
426                         boolean status;
427             do {
428                 cell += look;
429                 status = getChar();
430             } while(isCellRefChar(look) && status);
431             skipWhite();
432                         return cell;
433         }
434     }
435 
436     /**
437      * Keeps pulling numbers from the statement and add the resulting integer
438      * token to the tokenVector.
439      */
getNum()440     private void getNum() throws FormulaParsingException {
441 
442         Debug.log(Debug.TRACE,"getNum : ");
443         if(!isDigit(look))
444             throw new FormulaParsingException("Expected Integer" + makeErrorString());
445         else {
446             String num = new String();
447             boolean status;
448 
449             do {
450                 num += look;
451                 status = getChar();
452             } while((isDigit(look) || ((look == '.') && isDigit(formulaStr.charAt(index)))) && status);
453             skipWhite();
454             tokenVector.add(tokenFactory.getOperandToken(num, "INTEGER"));
455             if(isPercent(look)) {
456                 match(look);
457                 tokenVector.add(tokenFactory.getOperatorToken("%", 1));
458                 Debug.log(Debug.TRACE,"Added Percent token to Vector: ");
459             }
460             Debug.log(Debug.TRACE,"Number parsed : " + num);
461         }
462     }
463 
464 
465     /**
466      * Term will parse multiplication/division expressions
467      */
term()468     private void term() throws FormulaParsingException {
469         factor();
470         while(isMultiOp()) {
471             multiOp(Character.toString(look));
472         }
473     }
474 
475     /**
476      * Expression is the entry point for the parser. It is the code
477      * that parses addition/subtraction expressions.
478      */
expression()479     private void expression() throws FormulaParsingException {
480 
481         if (look == '"') { //Extract a quoted string...
482             StringBuffer buff = new StringBuffer();
483             boolean success = true;
484             success = getChar();
485             while (look != '"' && success) {
486                 buff.append(look);
487                 success = getChar();
488             }
489 
490             if (look != '"') { //We've reached the end of the string without getting a closing quote
491                 throw new FormulaParsingException("Expected closing quote." + makeErrorString());
492             } else {
493                 tokenVector.add(tokenFactory.getOperandToken(buff.toString(), "STRING"));
494                 getChar();      //Move on to the next character
495             }
496         } else {
497             term();
498         }
499         while(isAddOp(look) || isLogicalOp()) {
500             if (isAddOp(look)) {
501                 addOp(Character.toString(look));
502             } else if (isLogicalOp()) {
503                 logicalOp();
504             }
505         }
506     }
507 
508     /**
509      * Test to see if the next token (represented as a <code>String</code>) is
510      * the same as the String passed in. Move the index along to the end of
511      * that String and add that <code>Token</code> to the tokenVector. Then
512      * call <code>term</code> to parse the right hand side of the operator.
513      *
514      * @param op A <code>String</code> representing the operator
515      */
addOp(String op)516     private void addOp(String op) throws FormulaParsingException {
517         match(op);
518         tokenVector.add(tokenFactory.getOperatorToken(op, 2));
519         term();
520     }
521 
522     /**
523      * Test to see if the next token (represented as a <code>String</code>) is
524      * the same as the String passed in. Move the index along to the end of
525      * that String and add that <code>Token</code> to the tokenVector. Then
526      * call <code>factor</code> to parse the right hand side of the operator.
527      *
528      * @param op A <code>String</code> representing the operator
529      */
multiOp(String op)530     private void multiOp(String op) throws FormulaParsingException {
531         match(op);
532         tokenVector.add(tokenFactory.getOperatorToken(op, 2));
533         factor();
534     }
535 
536     /**
537      * Pull a logical operator starting at the current index, add a token for
538      * that operator to the tokenVector and call <code>term</code> to parse the
539      * right hand side of the operator
540      */
logicalOp()541     private void logicalOp()  throws FormulaParsingException {
542         String op = getLogicalOperator();
543         tokenVector.add(tokenFactory.getOperatorToken(op, 2));
544         term();
545     }
546 
makeErrorString()547     private String makeErrorString() {
548         StringBuffer buff = new StringBuffer();
549         for (int i=0; i<index-1; i++) {
550             buff.append(' ');
551         }
552 
553         buff.append('^');
554         return "\n\t" + formulaStr + "\n\t" + buff.toString();
555     }
556  }
557