1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula; 25 26 27 import java.util.Vector; 28 29 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook; 30 import org.openoffice.xmerge.util.Debug; 31 32 /** 33 * This is the Formula Parser based on an article written by Jack Crenshaw. It is a 34 * top down parser with some basic error handling. It handles 35 * +,-,*,/,>,<,>=,<=,=,<>, unary + and - as well as functions. 36 * The BNF notation for this parser is 37 * <pre> 38 * <expression> ::= <unary op> <term> [<addop>|<logop> <term>] 39 * <term> ::= <factor> [<mulop> <factor>] 40 * <factor> ::= <number>[%] | <CellRef> | <QuoteString> | <expression> 41 * </pre> 42 */ 43 public class FormulaParser { 44 45 private char look; 46 private String formulaStr; 47 private int index = 1; 48 private TokenFactory tokenFactory; 49 private Vector tokenVector; 50 private Workbook wb; 51 52 /** 53 * Default constructor 54 */ FormulaParser()55 public FormulaParser() { 56 57 Debug.log(Debug.TRACE,"Creating a Formula Parser"); 58 tokenFactory = new TokenFactory(); 59 tokenVector = new Vector(); 60 } 61 62 /** 63 * 64 */ setWorkbook(Workbook wb)65 public void setWorkbook(Workbook wb) { 66 67 this.wb = wb; 68 } 69 70 /** 71 * Parse method for parsing from a String to a byte[] 72 * 73 * @param formula A <code>String</code> representation of a formula 74 * starting with the '=' character 75 * @return A <code>Vector</code> containing the parsed <code>Token</code>s 76 */ parse(String formula)77 public Vector parse(String formula) throws FormulaParsingException { 78 79 index = 1; 80 look = ' '; 81 tokenVector.clear(); 82 if(formula.startsWith("=")) { 83 formulaStr = formula; 84 Debug.log(Debug.TRACE,"Creating a Formula Parser for " + formulaStr); 85 getChar(); 86 expression(); 87 } else { 88 throw new FormulaParsingException("No equals found!" + makeErrorString()); 89 } 90 return tokenVector; 91 } 92 93 /** 94 * Identify + and - operators 95 * 96 * @param c The character which is to be identified 97 * @return A boolean returning the result of the comparison 98 */ isAddOp(char c)99 private boolean isAddOp(char c) { 100 return (c == '-') || (c == '+'); 101 } 102 103 /** 104 * Determine if the current character is a multiop 105 * 106 * @return A boolean returning the result of the comparison 107 */ isMultiOp()108 private boolean isMultiOp() { 109 return look=='*' || look =='/' || look == '^' || look == '&'; 110 } 111 112 /** 113 * Identify <, >, <=, >=, =, <> using the index to find the current character(s) 114 * 115 * @return A boolean returning the result of the comparison 116 */ isLogicalOp()117 private boolean isLogicalOp() { 118 if (!isLogicalOpChar(look)) { 119 return false; 120 } else if ((index+1) >= formulaStr.length()) {//logical operators in their own right : if at end then return true 121 return true; 122 } else if (!isLogicalOpChar(formulaStr.charAt(index))) { // we have >, < or = on their own 123 return true; 124 } else if ((look == '<') && ((formulaStr.charAt(index) == '>') || formulaStr.charAt(index) == '=')) { // <>, or <= 125 return true; 126 } else if ((look == '>') && (formulaStr.charAt(index) == '=')) { // >= 127 return true; 128 } 129 130 return false; 131 } 132 133 /** 134 * Identify <, >, <=, >=, =, <> 135 * 136 * @param op The <code>String</code> which is to be identified 137 * @return A boolean returning the result of the comparison 138 */ isLogicalOp(String op)139 private boolean isLogicalOp(String op) { 140 return ((op.compareTo(">") == 0) || 141 (op.compareTo("<") == 0) || 142 (op.compareTo(">=") == 0) || 143 (op.compareTo("<=") == 0) || 144 (op.compareTo("=") == 0) || 145 (op.compareTo("<>") == 0)); 146 } 147 148 149 /** 150 * Identify characters that MAY be logical operator characters 151 * 152 * @param c The character which is to be identified 153 * @return A boolean returning the result of the comparison 154 */ isLogicalOpChar(char c)155 private boolean isLogicalOpChar(char c) { 156 return (c == '>') || (c == '<') || (c == '='); 157 } 158 159 /** 160 * Identify special Cell Reference charaters 161 * 162 * @param c The character which is to be identified 163 * @return A boolean returning the result of the comparison 164 */ isCellRefSpecialChar(char c)165 private boolean isCellRefSpecialChar(char c) { 166 return (c == ':') || (c == '$') || (c == '.'); 167 } 168 169 /** 170 * Identify letters 171 * 172 * @param c The character which is to be identified 173 * @return A boolean returning the result of the comparison 174 */ isAlpha(char c)175 private boolean isAlpha(char c) { 176 return(Character.isLetter(c)); 177 } 178 179 /** 180 * Identify numbers 181 * 182 * @param c The character which is to be identified 183 * @return A boolean returning the result of the comparison 184 */ isDigit(char c)185 private boolean isDigit(char c) { 186 return(Character.isDigit(c)); 187 } 188 189 /** 190 * Identify numbers 191 * 192 * @param c The character which is to be identified 193 * @return A boolean returning the result of the comparison 194 */ isPercent(char c)195 private boolean isPercent(char c) { 196 return (c == '%'); 197 } 198 199 /** 200 * Identify letters or numbers 201 * 202 * @param c The character which is to be identified 203 * @return A boolean returning the result of the comparison 204 */ isAlphaNum(char c)205 private boolean isAlphaNum(char c) { 206 return(isAlpha(c) || isDigit(c)); 207 } 208 209 /** 210 * Identify valid Characters for cell references 211 * 212 * @param c The character which is to be identified 213 * @return A boolean returning the result of the comparison 214 */ isCellRefChar(char c)215 private boolean isCellRefChar(char c) { 216 return(isAlpha(c) || isDigit(c) || isCellRefSpecialChar(c)); 217 } 218 219 /** 220 * Test if current character is a match and move to next character 221 * 222 * @param c The character which is to be matched 223 */ match(char c)224 private void match(char c) throws FormulaParsingException { 225 226 if(look==c) { 227 Debug.log(Debug.TRACE,"Operator Found : " + look); 228 getChar(); 229 skipWhite(); 230 } 231 else 232 throw new FormulaParsingException("Unexpected character '" + c + "'" + makeErrorString()); 233 } 234 235 /** 236 * Test if current character is a match and move to next character 237 * 238 * @param symbol The <code>String</code> to be matched. 239 */ match(String symbol)240 private void match(String symbol) throws FormulaParsingException { 241 242 int numChars = symbol.length(); 243 boolean bContinue = true; 244 for (int i=0;i<numChars && bContinue; i++) { 245 if (look == symbol.charAt(i)) { 246 bContinue = getChar(); 247 skipWhite(); 248 } else { 249 throw new FormulaParsingException("Unexpected character '" + symbol + "'" + makeErrorString()); 250 } 251 } 252 } 253 254 /** 255 * Skip over whitespaces (ie. spaces and tabs) 256 */ skipWhite()257 private void skipWhite() throws FormulaParsingException { 258 259 boolean success = true; 260 261 while(Character.isWhitespace(look) && success) { 262 success = getChar(); 263 } 264 } 265 266 /** 267 * This is a factor for multiplication and division operators 268 */ factor()269 private void factor() throws FormulaParsingException { 270 if(isAddOp(look)) { // handle unary addop 271 Character ch = new Character(look); 272 match(look); 273 tokenVector.add(tokenFactory.getOperatorToken(ch.toString(), 1)); 274 } 275 if(look=='(') { 276 match('('); 277 tokenVector.add(tokenFactory.getOperatorToken("(", 1)); 278 expression(); 279 match(')'); 280 tokenVector.add(tokenFactory.getOperatorToken(")", 1)); 281 } else if(isDigit(look)){ 282 getNum(); 283 } else { 284 ident(); 285 } 286 } 287 288 /** 289 * Pulls the next character from the <code>String</code> 290 * 291 * @return boolean false if the end if the statement 292 * is reached otherwise true 293 */ getChar()294 private boolean getChar() throws FormulaParsingException { 295 296 boolean success = true; 297 298 if(index<formulaStr.length()) { 299 look = formulaStr.charAt(index); 300 index++; 301 if(look==',') 302 success = false; 303 } else { 304 success = false; 305 } 306 return success; 307 } 308 309 /** 310 * Parses the number of arguments in a function 311 * 312 * @return The number of arguments 313 */ arguments()314 private int arguments() throws FormulaParsingException { 315 int numArgs; 316 317 skipWhite(); 318 if(look==')') 319 numArgs = 0; 320 else 321 numArgs = 1; 322 323 while(look!=')') { 324 expression(); 325 if(look==',') { 326 numArgs++; 327 match(','); 328 tokenVector.add(tokenFactory.getOperatorToken(",", 1)); 329 } 330 } 331 return numArgs; 332 } 333 334 /** 335 * Test to see if we have come across a cell reference or a Name 336 * Definition. 337 */ isCellRef(String s)338 private boolean isCellRef(String s) { 339 char c; 340 boolean result = false; 341 342 for(int i = 0;i<s.length();i++) { 343 c = s.charAt(i); 344 if(isCellRefSpecialChar(c)) { 345 result = true; 346 break; 347 } 348 } 349 350 // if it is a simple cell reference then there will not be a cell 351 // reference 'special char' so we should also look for a digit 352 if(!result) { 353 if(isDigit(s.charAt(1)) || isDigit(s.charAt(2))) { 354 result = true; 355 } 356 } 357 return result; 358 } 359 360 /** 361 * Test to see if we have come across a cell reference or a function and 362 * add the resulting toek nto the tokenVector. 363 */ ident()364 private void ident() throws FormulaParsingException { 365 366 String cell = getTokenString(); 367 if(look=='(') { 368 Debug.log(Debug.TRACE,"Found Function : " + cell); 369 370 int index = tokenVector.size(); 371 match('('); 372 tokenVector.add(tokenFactory.getOperatorToken("(", 1)); 373 int numArgs = arguments(); 374 match(')'); 375 tokenVector.add(tokenFactory.getOperatorToken(")", 1)); 376 tokenVector.insertElementAt(tokenFactory.getFunctionToken(cell, numArgs), index); 377 } else { 378 379 if(cell.indexOf('.')!=-1) { 380 String cellRef = cell.substring(cell.indexOf('.') + 1, cell.length()); 381 if(cellRef.indexOf(':')!=-1) { 382 tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_AREA_REFERENCE")); 383 } else { 384 tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_REFERENCE")); 385 } 386 } else if(cell.indexOf(':')!=-1) { 387 tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_AREA_REFERENCE")); 388 } else if(isCellRef(cell)) { 389 tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_REFERENCE")); 390 } else { 391 tokenVector.add(tokenFactory.getOperandToken(cell, "NAME")); 392 } 393 } 394 } 395 396 /** 397 * Will keep pulling valid logical operators from the formula and return 398 * the resultant <code>String</code>. 399 * 400 * @return a <code>String</code> representing a logical operator 401 */ getLogicalOperator()402 private String getLogicalOperator() throws FormulaParsingException { 403 String op = new String(); 404 boolean status; 405 406 do { 407 op += look; 408 status = getChar(); 409 } while(isLogicalOpChar(look) && status); 410 skipWhite(); 411 return op; 412 } 413 414 /** 415 * Keeps pulling characters from the statement until we get an 416 * operator and returns the resulting string. 417 * 418 * @return A <code>String</code>representing the next token 419 */ getTokenString()420 private String getTokenString() throws FormulaParsingException { 421 422 if(!isAlpha(look) && look!='$') 423 throw new FormulaParsingException("Expected Cell Reference" + makeErrorString()); 424 else { 425 String cell = new String(); 426 boolean status; 427 do { 428 cell += look; 429 status = getChar(); 430 } while(isCellRefChar(look) && status); 431 skipWhite(); 432 return cell; 433 } 434 } 435 436 /** 437 * Keeps pulling numbers from the statement and add the resulting integer 438 * token to the tokenVector. 439 */ getNum()440 private void getNum() throws FormulaParsingException { 441 442 Debug.log(Debug.TRACE,"getNum : "); 443 if(!isDigit(look)) 444 throw new FormulaParsingException("Expected Integer" + makeErrorString()); 445 else { 446 String num = new String(); 447 boolean status; 448 449 do { 450 num += look; 451 status = getChar(); 452 } while((isDigit(look) || ((look == '.') && isDigit(formulaStr.charAt(index)))) && status); 453 skipWhite(); 454 tokenVector.add(tokenFactory.getOperandToken(num, "INTEGER")); 455 if(isPercent(look)) { 456 match(look); 457 tokenVector.add(tokenFactory.getOperatorToken("%", 1)); 458 Debug.log(Debug.TRACE,"Added Percent token to Vector: "); 459 } 460 Debug.log(Debug.TRACE,"Number parsed : " + num); 461 } 462 } 463 464 465 /** 466 * Term will parse multiplication/division expressions 467 */ term()468 private void term() throws FormulaParsingException { 469 factor(); 470 while(isMultiOp()) { 471 multiOp(Character.toString(look)); 472 } 473 } 474 475 /** 476 * Expression is the entry point for the parser. It is the code 477 * that parses addition/subtraction expressions. 478 */ expression()479 private void expression() throws FormulaParsingException { 480 481 if (look == '"') { //Extract a quoted string... 482 StringBuffer buff = new StringBuffer(); 483 boolean success = true; 484 success = getChar(); 485 while (look != '"' && success) { 486 buff.append(look); 487 success = getChar(); 488 } 489 490 if (look != '"') { //We've reached the end of the string without getting a closing quote 491 throw new FormulaParsingException("Expected closing quote." + makeErrorString()); 492 } else { 493 tokenVector.add(tokenFactory.getOperandToken(buff.toString(), "STRING")); 494 getChar(); //Move on to the next character 495 } 496 } else { 497 term(); 498 } 499 while(isAddOp(look) || isLogicalOp()) { 500 if (isAddOp(look)) { 501 addOp(Character.toString(look)); 502 } else if (isLogicalOp()) { 503 logicalOp(); 504 } 505 } 506 } 507 508 /** 509 * Test to see if the next token (represented as a <code>String</code>) is 510 * the same as the String passed in. Move the index along to the end of 511 * that String and add that <code>Token</code> to the tokenVector. Then 512 * call <code>term</code> to parse the right hand side of the operator. 513 * 514 * @param op A <code>String</code> representing the operator 515 */ addOp(String op)516 private void addOp(String op) throws FormulaParsingException { 517 match(op); 518 tokenVector.add(tokenFactory.getOperatorToken(op, 2)); 519 term(); 520 } 521 522 /** 523 * Test to see if the next token (represented as a <code>String</code>) is 524 * the same as the String passed in. Move the index along to the end of 525 * that String and add that <code>Token</code> to the tokenVector. Then 526 * call <code>factor</code> to parse the right hand side of the operator. 527 * 528 * @param op A <code>String</code> representing the operator 529 */ multiOp(String op)530 private void multiOp(String op) throws FormulaParsingException { 531 match(op); 532 tokenVector.add(tokenFactory.getOperatorToken(op, 2)); 533 factor(); 534 } 535 536 /** 537 * Pull a logical operator starting at the current index, add a token for 538 * that operator to the tokenVector and call <code>term</code> to parse the 539 * right hand side of the operator 540 */ logicalOp()541 private void logicalOp() throws FormulaParsingException { 542 String op = getLogicalOperator(); 543 tokenVector.add(tokenFactory.getOperatorToken(op, 2)); 544 term(); 545 } 546 makeErrorString()547 private String makeErrorString() { 548 StringBuffer buff = new StringBuffer(); 549 for (int i=0; i<index-1; i++) { 550 buff.append(' '); 551 } 552 553 buff.append('^'); 554 return "\n\t" + formulaStr + "\n\t" + buff.toString(); 555 } 556 } 557 558