1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula;
25 
26 
27 import java.util.Vector;
28 
29 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook;
30 import org.openoffice.xmerge.util.Debug;
31 
32 /**
33  * This is the Formula Parser based on an article written by Jack Crenshaw. It is a
34  * top down parser with some basic error handling. It handles
35  * +,-,*,/,>,<,>=,<=,=,<>, unary + and - as well as functions.
36  * The BNF notation for this parser is
37  * <pre>
38  *	&lt;expression&gt; ::= &lt;unary op&gt; &lt;term&gt; [&lt;addop&gt;|&lt;logop&gt; &lt;term&gt;]
39  *	&lt;term&gt;       ::= &lt;factor&gt; [&lt;mulop&gt; &lt;factor&gt;]
40  *	&lt;factor&gt;     ::= &lt;number&gt;[%] | &lt;CellRef&gt; | &lt;QuoteString&gt; | &lt;expression&gt;
41  * </pre>
42  */
43 public class FormulaParser {
44 
45 	private char look;
46 	private String formulaStr;
47 	private int index = 1;
48 	private TokenFactory tokenFactory;
49 	private Vector tokenVector;
50 	private Workbook wb;
51 
52  	/**
53  	 * Default constructor
54  	 */
FormulaParser()55  	public FormulaParser() {
56 
57 		Debug.log(Debug.TRACE,"Creating a Formula Parser");
58 		tokenFactory = new TokenFactory();
59 		tokenVector = new Vector();
60 	}
61 
62 	/**
63 	 *
64 	 */
setWorkbook(Workbook wb)65 	public void setWorkbook(Workbook wb) {
66 
67 		this.wb = wb;
68 	}
69 
70 	/**
71 	 * Parse method for parsing from a String to a byte[]
72 	 *
73 	 * @param formula A <code>String</code> representation of a formula
74 	 * starting with the '=' character
75 	 * @return A <code>Vector</code> containing the parsed <code>Token</code>s
76 	 */
parse(String formula)77 	public Vector parse(String formula) throws FormulaParsingException {
78 
79 		index = 1;
80 		look = ' ';
81 		tokenVector.clear();
82 		if(formula.startsWith("=")) {
83 			formulaStr = formula;
84 			Debug.log(Debug.TRACE,"Creating a Formula Parser for " + formulaStr);
85 			getChar();
86 			expression();
87 		} else {
88 			throw new FormulaParsingException("No equals found!" + makeErrorString());
89 		}
90 		return tokenVector;
91 	}
92 
93 	/**
94 	 * Identify + and - operators
95 	 *
96 	 * @param  c The character which is to be identified
97 	 * @return A boolean returning the result of the comparison
98 	 */
isAddOp(char c)99 	private boolean isAddOp(char c) {
100  		return (c == '-') || (c == '+');
101  	}
102 
103 	/**
104 	 * Determine if the current character is a multiop
105 	 *
106 	 * @return A boolean returning the result of the comparison
107 	 */
isMultiOp()108 	private boolean isMultiOp() {
109 		return look=='*' || look =='/' || look == '^' || look == '&';
110 	}
111 
112 	/**
113 	 * Identify &lt;, &gt;, &lt;=, &gt;=, =, &lt;&gt; using the index to find the current character(s)
114 	 *
115 	 * @return A boolean returning the result of the comparison
116 	 */
isLogicalOp()117 	private boolean isLogicalOp() {
118 		if (!isLogicalOpChar(look)) {
119 			return false;
120 		} else if ((index+1) >= formulaStr.length()) {//logical operators in their own right : if at end then return true
121 			return true;
122 		} else if (!isLogicalOpChar(formulaStr.charAt(index))) { // we have >, < or = on their own
123 			return true;
124 		} else if ((look == '<') && ((formulaStr.charAt(index) == '>') || formulaStr.charAt(index) == '=')) { // <>, or <=
125 			return true;
126 		} else if ((look == '>')  && (formulaStr.charAt(index) == '=')) { // >=
127 			return true;
128 		}
129 
130 		return false;
131  	}
132 
133 	/**
134 	 * Identify &lt;, &gt;, &lt;=, &gt;=, =, &lt;&gt;
135 	 *
136 	 * @param op The <code>String</code> which is to be identified
137 	 * @return A boolean returning the result of the comparison
138 	 */
isLogicalOp(String op)139 	private boolean isLogicalOp(String op) {
140 		return	((op.compareTo(">") == 0) ||
141 				 (op.compareTo("<") == 0) ||
142 				 (op.compareTo(">=") == 0) ||
143 				 (op.compareTo("<=") == 0) ||
144 				 (op.compareTo("=") == 0) ||
145 				 (op.compareTo("<>") == 0));
146 	}
147 
148 
149 	/**
150 	 * Identify characters that MAY be logical operator characters
151 	 *
152 	 * @param  c The character which is to be identified
153 	 * @return A boolean returning the result of the comparison
154 	 */
isLogicalOpChar(char c)155 	private boolean isLogicalOpChar(char c) {
156 		return (c == '>') || (c == '<') || (c == '=');
157 	}
158 
159  	/**
160  	 * Identify special Cell Reference charaters
161 	 *
162 	 * @param  c The character which is to be identified
163 	 * @return A boolean returning the result of the comparison
164  	 */
isCellRefSpecialChar(char c)165  	private boolean isCellRefSpecialChar(char c) {
166  		return (c == ':') || (c == '$') || (c == '.');
167  	}
168 
169  	/**
170  	 * Identify letters
171 	 *
172 	 * @param  c The character which is to be identified
173 	 * @return A boolean returning the result of the comparison
174  	 */
isAlpha(char c)175  	private boolean isAlpha(char c) {
176  		return(Character.isLetter(c));
177  	}
178 
179  	/**
180  	 * Identify numbers
181 	 *
182 	 * @param  c The character which is to be identified
183 	 * @return A boolean returning the result of the comparison
184  	 */
isDigit(char c)185  	private boolean isDigit(char c) {
186  		return(Character.isDigit(c));
187  	}
188 
189  	/**
190  	 * Identify numbers
191 	 *
192 	 * @param  c The character which is to be identified
193 	 * @return A boolean returning the result of the comparison
194  	 */
isPercent(char c)195  	private boolean isPercent(char c) {
196  		return (c == '%');
197  	}
198 
199  	/**
200  	 * Identify letters or numbers
201 	 *
202 	 * @param  c The character which is to be identified
203 	 * @return A boolean returning the result of the comparison
204  	 */
isAlphaNum(char c)205  	private boolean isAlphaNum(char c) {
206  		return(isAlpha(c) || isDigit(c));
207  	}
208 
209     /**
210  	 * Identify valid Characters for cell references
211 	 *
212 	 * @param  c The character which is to be identified
213 	 * @return A boolean returning the result of the comparison
214  	 */
isCellRefChar(char c)215  	private boolean isCellRefChar(char c) {
216  		return(isAlpha(c) || isDigit(c) || isCellRefSpecialChar(c));
217  	}
218 
219 	/**
220 	 * Test if current character is a match and move to next character
221 	 *
222 	 * @param  c The character which is to be matched
223 	 */
match(char c)224  	private void match(char c) throws FormulaParsingException {
225 
226  		if(look==c) {
227 			Debug.log(Debug.TRACE,"Operator Found : " + look);
228  			getChar();
229  			skipWhite();
230  		}
231  		else
232  			throw new FormulaParsingException("Unexpected character '" + c + "'" + makeErrorString());
233  	}
234 
235 	/**
236 	 * Test if current character is a match and move to next character
237 	 *
238 	 * @param symbol The <code>String</code> to be matched.
239 	 */
match(String symbol)240  	private void match(String symbol) throws FormulaParsingException {
241 
242  		int numChars = symbol.length();
243 		boolean bContinue = true;
244 		for (int i=0;i<numChars && bContinue; i++) {
245 			if (look == symbol.charAt(i)) {
246 				bContinue = getChar();
247 				skipWhite();
248 			} else {
249 				throw new FormulaParsingException("Unexpected character '" + symbol + "'" + makeErrorString());
250 			}
251 		}
252  	}
253 
254  	/**
255  	 * Skip over whitespaces (ie. spaces and tabs)
256  	 */
skipWhite()257  	private void skipWhite() throws FormulaParsingException {
258 
259  		boolean success = true;
260 
261  		while(Character.isWhitespace(look) && success) {
262  			success = getChar();
263  		}
264  	}
265 
266  	/**
267  	 * This is a factor for multiplication and division operators
268  	 */
factor()269  	private void factor() throws FormulaParsingException {
270 		if(isAddOp(look)) {			// handle unary addop
271 			Character ch = new Character(look);
272 			match(look);
273 			tokenVector.add(tokenFactory.getOperatorToken(ch.toString(), 1));
274 		}
275  		if(look=='(') {
276  			match('(');
277 			tokenVector.add(tokenFactory.getOperatorToken("(", 1));
278  			expression();
279  			match(')');
280                         tokenVector.add(tokenFactory.getOperatorToken(")", 1));
281  		} else if(isDigit(look)){
282  			getNum();
283  		} else {
284 			ident();
285  		}
286  	}
287 
288  	/**
289  	 * Pulls the next character from the <code>String</code>
290  	 *
291  	 * @return boolean	false if the end if the statement
292  	 * 					is reached otherwise true
293  	 */
getChar()294  	private boolean getChar() throws FormulaParsingException {
295 
296  			boolean success = true;
297 
298  			if(index<formulaStr.length()) {
299  				look = formulaStr.charAt(index);
300  				index++;
301                                 if(look==',')
302                                     success = false;
303  			} else {
304  				success = false;
305  			}
306  			return success;
307  	}
308 
309 	/**
310 	 * Parses the number of arguments in a function
311 	 *
312 	 * @return The number of arguments
313 	 */
arguments()314 	private int arguments() throws FormulaParsingException {
315 		int numArgs;
316 
317 		skipWhite();
318 		if(look==')')
319 			numArgs = 0;
320 		else
321 			numArgs = 1;
322 
323 		while(look!=')') {
324 			expression();
325 			if(look==',') {
326 				numArgs++;
327 				match(',');
328 				tokenVector.add(tokenFactory.getOperatorToken(",", 1));
329 			}
330 		}
331 		return numArgs;
332 	}
333 
334  	/**
335  	 * Test to see if we have come across a cell reference or a Name
336 	 * Definition.
337 	 */
isCellRef(String s)338 	 private boolean isCellRef(String s) {
339 	 	char c;
340 		boolean result = false;
341 
342 		for(int i = 0;i<s.length();i++) {
343 			c = s.charAt(i);
344 			if(isCellRefSpecialChar(c)) {
345 				result = true;
346 				break;
347 			}
348 		}
349 
350 		// if it is a simple cell reference then there will not be a cell
351 		// reference 'special char' so we should also look for a digit
352 		if(!result) {
353 			if(isDigit(s.charAt(1)) || isDigit(s.charAt(2))) {
354 				result = true;
355 			}
356 		}
357 		return result;
358 	 }
359 
360  	/**
361  	 * Test to see if we have come across a cell reference or a function and
362 	 * add the resulting toek nto the tokenVector.
363 	 */
ident()364  	private void ident() throws FormulaParsingException {
365 
366  		String cell = getTokenString();
367 		if(look=='(') {
368 			Debug.log(Debug.TRACE,"Found Function : " + cell);
369 
370 			int index = tokenVector.size();
371  			match('(');
372 			tokenVector.add(tokenFactory.getOperatorToken("(", 1));
373 			int numArgs = arguments();
374 			match(')');
375 			tokenVector.add(tokenFactory.getOperatorToken(")", 1));
376 			tokenVector.insertElementAt(tokenFactory.getFunctionToken(cell, numArgs), index);
377  		} else {
378 
379 			if(cell.indexOf('.')!=-1) {
380 				String cellRef = cell.substring(cell.indexOf('.') + 1, cell.length());
381 				if(cellRef.indexOf(':')!=-1) {
382 					tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_AREA_REFERENCE"));
383 				} else {
384 					tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_REFERENCE"));
385 				}
386 			} else if(cell.indexOf(':')!=-1) {
387 				tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_AREA_REFERENCE"));
388 			} else if(isCellRef(cell)) {
389 				tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_REFERENCE"));
390 			} else {
391 				tokenVector.add(tokenFactory.getOperandToken(cell, "NAME"));
392 			}
393  		}
394  	}
395 
396  	/**
397 	 * Will keep pulling valid logical operators from the formula and return
398 	 * the resultant <code>String</code>.
399 	 *
400 	 * @return a <code>String</code> representing a logical operator
401 	 */
getLogicalOperator()402 	private String getLogicalOperator() throws FormulaParsingException {
403 		String op = new String();
404 		boolean status;
405 
406 		do {
407 			op += look;
408             status = getChar();
409 		} while(isLogicalOpChar(look) && status);
410  		skipWhite();
411         return op;
412 	}
413 
414  	/**
415  	 * Keeps pulling characters from the statement until we get an
416      * operator and returns the resulting string.
417 	 *
418 	 * @return A <code>String</code>representing the next token
419  	 */
getTokenString()420   	private String getTokenString() throws FormulaParsingException {
421 
422  		if(!isAlpha(look) && look!='$')
423  			throw new FormulaParsingException("Expected Cell Reference" + makeErrorString());
424  		else {
425  			String cell = new String();
426                         boolean status;
427  			do {
428  				cell += look;
429                 status = getChar();
430  			} while(isCellRefChar(look) && status);
431  			skipWhite();
432                         return cell;
433    		}
434  	}
435 
436  	/**
437  	 * Keeps pulling numbers from the statement and add the resulting integer
438 	 * token to the tokenVector.
439  	 */
getNum()440  	private void getNum() throws FormulaParsingException {
441 
442 		Debug.log(Debug.TRACE,"getNum : ");
443  		if(!isDigit(look))
444  			throw new FormulaParsingException("Expected Integer" + makeErrorString());
445  		else {
446  			String num = new String();
447  			boolean status;
448 
449  			do {
450  				num += look;
451 				status = getChar();
452  			} while((isDigit(look) || ((look == '.') && isDigit(formulaStr.charAt(index)))) && status);
453  			skipWhite();
454 			tokenVector.add(tokenFactory.getOperandToken(num, "INTEGER"));
455 			if(isPercent(look)) {
456 				match(look);
457 				tokenVector.add(tokenFactory.getOperatorToken("%", 1));
458 				Debug.log(Debug.TRACE,"Added Percent token to Vector: ");
459 			}
460 			Debug.log(Debug.TRACE,"Number parsed : " + num);
461  		}
462  	}
463 
464 
465 	/**
466      * Term will parse multiplication/division expressions
467      */
term()468  	private void term() throws FormulaParsingException {
469 		factor();
470 		while(isMultiOp()) {
471 			multiOp(Character.toString(look));
472 		}
473  	}
474 
475  	/**
476  	 * Expression is the entry point for the parser. It is the code
477      * that parses addition/subtraction expressions.
478  	 */
expression()479  	private void expression() throws FormulaParsingException {
480 
481 		if (look == '"') { //Extract a quoted string...
482 			StringBuffer buff = new StringBuffer();
483 			boolean success = true;
484 			success = getChar();
485 			while (look != '"' && success) {
486 				buff.append(look);
487 				success = getChar();
488 			}
489 
490 			if (look != '"') { //We've reached the end of the string without getting a closing quote
491 				throw new FormulaParsingException("Expected closing quote." + makeErrorString());
492 			} else {
493 				tokenVector.add(tokenFactory.getOperandToken(buff.toString(), "STRING"));
494 				getChar();		//Move on to the next character
495 			}
496 		} else {
497 			term();
498 		}
499 		while(isAddOp(look) || isLogicalOp()) {
500 			if (isAddOp(look)) {
501 				addOp(Character.toString(look));
502 			} else if (isLogicalOp()) {
503 				logicalOp();
504 			}
505 		}
506  	}
507 
508 	/**
509 	 * Test to see if the next token (represented as a <code>String</code>) is
510 	 * the same as the String passed in. Move the index along to the end of
511 	 * that String and add that <code>Token</code> to the tokenVector. Then
512 	 * call <code>term</code> to parse the right hand side of the operator.
513 	 *
514 	 * @param op A <code>String</code> representing the operator
515 	 */
addOp(String op)516 	private void addOp(String op) throws FormulaParsingException {
517  		match(op);
518 		tokenVector.add(tokenFactory.getOperatorToken(op, 2));
519  		term();
520 	}
521 
522 	/**
523 	 * Test to see if the next token (represented as a <code>String</code>) is
524 	 * the same as the String passed in. Move the index along to the end of
525 	 * that String and add that <code>Token</code> to the tokenVector. Then
526 	 * call <code>factor</code> to parse the right hand side of the operator.
527 	 *
528 	 * @param op A <code>String</code> representing the operator
529 	 */
multiOp(String op)530 	private void multiOp(String op) throws FormulaParsingException {
531 		match(op);
532 		tokenVector.add(tokenFactory.getOperatorToken(op, 2));
533  		factor();
534 	}
535 
536 	/**
537 	 * Pull a logical operator starting at the current index, add a token for
538 	 * that operator to the tokenVector and call <code>term</code> to parse the
539 	 * right hand side of the operator
540 	 */
logicalOp()541 	private void logicalOp()  throws FormulaParsingException {
542 		String op = getLogicalOperator();
543         tokenVector.add(tokenFactory.getOperatorToken(op, 2));
544         term();
545 	}
546 
makeErrorString()547 	private String makeErrorString() {
548 		StringBuffer buff = new StringBuffer();
549 		for (int i=0; i<index-1; i++) {
550 			buff.append(' ');
551 		}
552 
553 		buff.append('^');
554 		return "\n\t" + formulaStr + "\n\t" + buff.toString();
555 	}
556  }
557 
558