1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula;
25 
26 import java.io.*;
27 import java.util.Vector;
28 import java.util.Enumeration;
29 
30 import org.openoffice.xmerge.util.Debug;
31 import org.openoffice.xmerge.util.EndianConverter;
32 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.DefinedName;
33 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook;
34 
35 /**
36  * The TokenDecoder decodes a byte[] to an equivalent <code>String</code>. The only
37  * public method apart from the default constructor is the getTokenVector method.
38  * This method takes an entire formula as a pexcel byte[] and decodes it into
39  * a series of <code>Token</code>s. It adds these to a <code>Vector</code> which
40  * is returned once all the tokens have been decoded. The Decoder supports
41  * the following tokens.<br><br>
42  *
43  * Operands		Floating point's, Cell references (absolute and relative),
44  *				cell ranges<br>
45  * Operators	+,-,*,/,&lt;,&gt;.&lt;=,&gt;=,&lt;&gt;<br>
46  * Functions	All pexcel fixed and varaible argument functions
47  *
48  */
49 public class TokenDecoder {
50 
51     private TokenFactory tf;
52     private FunctionLookup fl;
53     private OperatorLookup operatorLookup;
54     private OperandLookup operandLookup;
55 	private Workbook wb;
56 
57     /**
58      * Default Constructor initializes the <code>TokenFactory</code> for generating
59      * <code>Token</code> and the <code>SymbolLookup</code> for generating
60      * Strings from hex values.
61      */
TokenDecoder()62     public TokenDecoder() {
63         tf = new TokenFactory();
64         fl = new FunctionLookup();
65         operatorLookup = new OperatorLookup();
66         operandLookup = new OperandLookup();
67     }
68 
69   	/**
70 	 * Sets global workbook data needed for defined names
71 	 */
setWorkbook(Workbook wb)72    	public void setWorkbook(Workbook wb) {
73 
74 		Debug.log(Debug.TRACE, "TokenDecoder : setWorkbook");
75 		this.wb = wb;
76 	}
77 
78     /**
79      * Returns a <code>Vector</code> of <code>Token</code> decoded from a
80 	 * byte[]. The byte[] is first converted to a
81 	 * <code>ByteArrayInputStream</code> as this is the easiest way of reading
82 	 * bytes.
83 	 *
84 	 * @param formula A Pocket Excel Formula byte[]
85 	 * @return A <code>Vector</code> of deoded <code>Token</code>
86      */
getTokenVector(byte[] formula)87     public Vector getTokenVector(byte[] formula) {
88 
89 		Vector v = new Vector();
90 
91         ByteArrayInputStream bis = new ByteArrayInputStream(formula);
92         int b = 0 ;
93         Token t;
94 
95         while ((b = bis.read())!=-1)
96         {
97 
98 
99             switch (b) {
100 
101             	case TokenConstants.TAREA3D:
102 								Debug.log(Debug.TRACE, "Decoded 3D Area Cell Reference: ");
103                 				v.add(read3DCellAreaRefToken(bis));
104 								Debug.log(Debug.TRACE, "Decoded 3D Area Cell Reference: " + v.lastElement());
105                                 break;
106             	case TokenConstants.TREF3D:
107 								Debug.log(Debug.TRACE, "Decoded 3D Cell Reference: ");
108                 				v.add(read3DCellRefToken(bis));
109 								Debug.log(Debug.TRACE, "Decoded 3D Cell Reference: " + v.lastElement());
110                                 break;
111             	case TokenConstants.TREF :
112                 				v.add(readCellRefToken(bis));
113 								Debug.log(Debug.TRACE, "Decoded Cell Reference: " + v.lastElement());
114                                 break;
115                 case TokenConstants.TAREA :
116                                 v.add(readCellAreaRefToken(bis));
117 								Debug.log(Debug.TRACE, "Decoded Cell Area Reference: " + v.lastElement());
118                                 break;
119                 case TokenConstants.TNUM :
120                                 v.add(readNumToken(bis));
121 								Debug.log(Debug.TRACE, "Decoded number : " + v.lastElement());
122                                 break;
123                 case TokenConstants.TFUNCVAR :
124                                 v.add(readFunctionVarToken(bis));
125 								Debug.log(Debug.TRACE, "Decoded variable argument function: " + v.lastElement());
126                                 break;
127 				case TokenConstants.TFUNC :
128                                 v.add(readFunctionToken(bis));
129 								Debug.log(Debug.TRACE, "Decoded function: " + v.lastElement());
130                                 break;
131 				case TokenConstants.TSTRING :
132 								v.add(readStringToken(bis));
133 								Debug.log(Debug.TRACE, "Decoded string: " + v.lastElement());
134                                 break;
135 				case TokenConstants.TNAME :
136 								v.add(readNameToken(bis));
137 								Debug.log(Debug.TRACE, "Decoded defined name: " + v.lastElement());
138                                 break;
139                 case TokenConstants.TUPLUS:
140                 case TokenConstants.TUMINUS:
141                 case TokenConstants.TPERCENT:
142                                 v.add(readOperatorToken(b, 1));
143 								Debug.log(Debug.TRACE, "Decoded Unary operator : " + v.lastElement());
144                                 break;
145                 case TokenConstants.TADD :
146                 case TokenConstants.TSUB :
147                 case TokenConstants.TMUL :
148                 case TokenConstants.TDIV :
149                 case TokenConstants.TLESS :
150                 case TokenConstants.TLESSEQUALS :
151                 case TokenConstants.TEQUALS :
152                 case TokenConstants.TGTEQUALS :
153                 case TokenConstants.TGREATER :
154                 case TokenConstants.TNEQUALS :
155                                 v.add(readOperatorToken(b, 2));
156 								Debug.log(Debug.TRACE, "Decoded Binary operator : " + v.lastElement());
157                                 break;
158 
159                 default :
160 								Debug.log(Debug.TRACE, "Unrecognized byte : " + b);
161             }
162         }
163         return v;
164     }
165 
166     /**
167      * Converts a zero based integer to a char (eg. a=0, b=1).
168      * It assumes the integer is less than 26.
169 	 *
170 	 * @param i A 0 based index
171 	 * @return The equivalent character
172      */
int2Char(int i)173     private char int2Char(int i) {
174         return (char) ('A' + i);
175     }
176 
177     /**
178      * Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
179 	 *
180 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
181 	 * bytes.
182 	 * @return The decoded String <code>Token</code>
183      */
readStringToken(ByteArrayInputStream bis)184     private Token readStringToken(ByteArrayInputStream bis) {
185 
186 		int len = ((int)bis.read())*2;
187 		int options = (int)bis.read();
188 		Debug.log(Debug.TRACE,"String length is " + len + " and Options Flag is " + options);
189 		byte [] stringBytes = new byte[len];
190 		int numRead =0;
191 		if ((numRead = bis.read(stringBytes, 0, len)) != len) {
192 			Debug.log(Debug.TRACE,"Expected " + len + " bytes. Could only read " + numRead + " bytes.");
193 			//throw new IOException("Expected " + len + " bytes. Could only read " + numRead + " bytes.");
194 		}
195         StringBuffer outputString = new StringBuffer();
196 		outputString.append('"');
197 		try {
198 			Debug.log(Debug.TRACE,"Using LE encoding");
199 			outputString.append(new String(stringBytes, "UTF-16LE"));
200 		} catch (IOException eIO) {
201 			outputString.append(new String(stringBytes)); //fall back to default encoding
202 		}
203 		outputString.append('"');
204 
205         return (tf.getOperandToken(outputString.toString(), "STRING"));
206     }
207 
208     /**
209      * Reads a Defined Name  token from the <code>ByteArrayInputStream</code>
210 	 *
211 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
212 	 * bytes.
213 	 * @return The decoded Name <code>Token</code>
214      */
readNameToken(ByteArrayInputStream bis)215     private Token readNameToken(ByteArrayInputStream bis) {
216 		byte buffer[] = new byte[2];
217 		buffer[0] = (byte) bis.read();
218         buffer[1] = (byte) bis.read();
219         int nameIndex = EndianConverter.readShort(buffer);
220 		bis.skip(12);		// the next 12 bytes are unused
221 		Enumeration e = wb.getDefinedNames();
222 		int i = 1;
223 		while(i<nameIndex) {
224 			e.nextElement();
225 			i++;
226 		}
227 		Debug.log(Debug.TRACE,"Name index is " + nameIndex);
228 		DefinedName dn = (DefinedName)e.nextElement();
229 		Debug.log(Debug.TRACE,"DefinedName is " + dn.getName());
230         return (tf.getOperandToken(dn.getName(), "NAME"));
231     }
232 
233     /**
234      * Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
235 	 *
236 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
237 	 * bytes.
238 	 * @return The decoded Cell Reference <code>Token</code>
239      */
readCellRefToken(ByteArrayInputStream bis)240     private Token readCellRefToken(ByteArrayInputStream bis) {
241 
242         byte buffer[] = new byte[2];
243         String outputString = new String();
244 
245         buffer[0] = (byte) bis.read();
246         buffer[1] = (byte) bis.read();
247         int formulaRow = EndianConverter.readShort(buffer);
248         int relativeFlags = (formulaRow & 0xC000)>>14;
249         formulaRow &= 0x3FFF;
250         int formulaCol = (byte) bis.read();
251 
252         outputString = int2CellStr(formulaRow, formulaCol, relativeFlags);
253 
254         return (tf.getOperandToken(outputString,"CELL_REFERENCE"));
255     }
256 
257     /**
258      * Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
259 	 *
260 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
261 	 * bytes.
262 	 * @return The decoded Cell Reference <code>Token</code>
263      */
read3DCellRefToken(ByteArrayInputStream bis)264     private Token read3DCellRefToken(ByteArrayInputStream bis) {
265 
266         byte buffer[] = new byte[2];
267         String outputString = new String();
268 
269 		bis.skip(10);
270 
271 	   	buffer[0] = (byte) bis.read();
272  		buffer[1] = (byte) bis.read();
273 		int Sheet1 = EndianConverter.readShort(buffer);
274 	   	buffer[0] = (byte) bis.read();
275  		buffer[1] = (byte) bis.read();
276 		int Sheet2 = EndianConverter.readShort(buffer);
277 
278         buffer[0] = (byte) bis.read();
279         buffer[1] = (byte) bis.read();
280         int formulaRow = EndianConverter.readShort(buffer);
281         int relativeFlags = (formulaRow & 0xC000)>>14;
282         formulaRow &= 0x3FFF;
283         int formulaCol = (byte) bis.read();
284         String cellRef = "." + int2CellStr(formulaRow, formulaCol, relativeFlags);
285 		if(Sheet1 == Sheet2) {
286 			outputString = "$" + wb.getSheetName(Sheet1) + cellRef;
287 		} else {
288 			outputString = "$" + wb.getSheetName(Sheet1) + cellRef + ":$" + wb.getSheetName(Sheet2) + cellRef;
289 		}
290 
291         return (tf.getOperandToken(outputString,"3D_CELL_REFERENCE"));
292     }
293 
294     /**
295      * Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
296 	 *
297 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
298 	 * bytes.
299 	 * @return The decoded Cell Reference <code>Token</code>
300      */
read3DCellAreaRefToken(ByteArrayInputStream bis)301     private Token read3DCellAreaRefToken(ByteArrayInputStream bis) {
302 
303         byte buffer[] = new byte[2];
304         String outputString = new String();
305 
306 		bis.skip(10);
307 
308 	   	buffer[0] = (byte) bis.read();
309  		buffer[1] = (byte) bis.read();
310 		int Sheet1 = EndianConverter.readShort(buffer);
311 	   	buffer[0] = (byte) bis.read();
312  		buffer[1] = (byte) bis.read();
313 		int Sheet2 = EndianConverter.readShort(buffer);
314 
315         buffer[0] = (byte) bis.read();
316         buffer[1] = (byte) bis.read();
317         int formulaRow1 = EndianConverter.readShort(buffer);
318         int relativeFlags1 = (formulaRow1 & 0xC000)>>14;
319         formulaRow1 &= 0x3FFF;
320 
321         buffer[0] = (byte) bis.read();
322         buffer[1] = (byte) bis.read();
323         int formulaRow2 = EndianConverter.readShort(buffer);
324         int relativeFlags2 = (formulaRow2 & 0xC000)>>14;
325         formulaRow2 &= 0x3FFF;
326 
327         int formulaCol1 = (byte) bis.read();
328         int formulaCol2 = (byte) bis.read();
329 
330         String cellRef1 = "." + int2CellStr(formulaRow1, formulaCol1, relativeFlags1);
331 		String cellRef2 = int2CellStr(formulaRow2, formulaCol2, relativeFlags2);
332 
333 		if(Sheet1 == Sheet2) {
334 			outputString = "$" + wb.getSheetName(Sheet1) + cellRef1 + ":" + cellRef2;
335 		} else {
336 			outputString = "$" + wb.getSheetName(Sheet1) + cellRef1 + ":$" + wb.getSheetName(Sheet2) + "." + cellRef2;
337 		}
338 
339         return (tf.getOperandToken(outputString,"3D_CELL_AREA_REFERENCE"));
340     }
341 
342     /**
343      * Converts a row and col 0 based index to a spreadsheet cell reference.
344      * It also has a relativeFlags which indicates whether or not the
345      * Cell Reference is relative or absolute (Absolute is denoted with '$')
346      *
347      * 00 = absolute row, absolute col
348      * 01 = absolute row, relative col
349      * 10 = relative row, absolute col
350      * 11 = relative row, relative col
351 	 *
352 	 * @param row The cell reference 0 based index to the row
353 	 * @param col The cell reference 0 based index to the row
354 	 * @param relativeFlags Flags indicating addressing of row and column
355 	 * @return A <code>String</code> representing a cell reference
356      */
int2CellStr(int row, int col, int relativeFlags)357     private String int2CellStr(int row, int col, int relativeFlags) {
358        String outputString = "";
359        int firstChar = (col + 1) / 26;
360 
361        if((relativeFlags & 1) == 0) {
362            outputString += "$";
363        }
364 
365         if(firstChar>0) {
366                 int secondChar = (col + 1) % 26;
367                 outputString += Character.toString(int2Char(firstChar - 1)) + Character.toString(int2Char(secondChar - 1));
368         } else {
369                 outputString += Character.toString(int2Char(col));
370         }
371        if((relativeFlags & 2) == 0) {
372            outputString += "$";
373        }
374        outputString += Integer.toString(row+1);
375        return outputString;
376     }
377 
378     /**
379      * Reads a Cell Area Reference (cell range) <code>Token</code> from
380 	 * the <code>ByteArrayInputStream</code>
381 	 *
382 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
383 	 * bytes.
384 	 * @return The equivalent Cell Area Reference (cell range)
385 	 * <code>Token</code>
386      */
readCellAreaRefToken(ByteArrayInputStream bis)387     private Token readCellAreaRefToken(ByteArrayInputStream bis) {
388         byte buffer[] = new byte[2];
389         int formulaRow1, formulaRow2;
390         int formulaCol1, formulaCol2;
391 
392         String outputString = new String();
393 
394         buffer[0] = (byte) bis.read();
395         buffer[1] = (byte) bis.read();
396         formulaRow1 = EndianConverter.readShort(buffer);
397         int relativeFlags1 = (formulaRow1 & 0xC000)>>14;
398         formulaRow1 &= 0x3FFF;
399         buffer[0] = (byte) bis.read();
400         buffer[1] = (byte) bis.read();
401         formulaRow2 = EndianConverter.readShort(buffer);
402         int relativeFlags2 = (formulaRow2 & 0xC000)>>14;
403         formulaRow2 &= 0x3FFF;
404 
405         formulaCol1 = (byte) bis.read();
406         formulaCol2 = (byte) bis.read();
407 
408         outputString = int2CellStr(formulaRow1, formulaCol1, relativeFlags1);
409         outputString += (":" + int2CellStr(formulaRow2, formulaCol2, relativeFlags2));
410 
411         return (tf.getOperandToken(outputString,"CELL_AREA_REFERENCE"));
412     }
413 
414 
415     /**
416      * Reads a Number (floating point) token from the <code>ByteArrayInputStream</code>
417 	 *
418 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
419 	 * bytes.
420 	 * @return The decoded Integer <code>Token</code>
421      */
readNumToken(ByteArrayInputStream bis)422     private Token readNumToken(ByteArrayInputStream bis) {
423 
424         byte numBuffer[] = new byte[8];
425 
426         for(int j=0;j<8;j++) {
427                 numBuffer[j]=(byte) bis.read();
428         }
429 
430         return (tf.getOperandToken(Double.toString(EndianConverter.readDouble(numBuffer)),"NUMBER"));
431     }
432 
433     /**
434      * Read an Operator token from the <code>ByteArrayInputStream</code>
435 	 *
436 	 * @param b A Pocket Excel number representing an operator.
437 	 * @param args The number of arguments this operator takes.
438 	 * @return The decoded Operator <code>Token</code>
439      */
readOperatorToken(int b, int args)440     private Token readOperatorToken(int b, int args) {
441 
442 		Token t;
443 
444 		if(b==TokenConstants.TUPLUS) {
445 			t = tf.getOperatorToken("+", args);
446 		} else if(b==TokenConstants.TUMINUS) {
447 			t = tf.getOperatorToken("-", args);
448 		} else {
449 			t = tf.getOperatorToken(operatorLookup.getStringFromID(b), args);
450 		}
451         return t;
452      }
453 
454     /**
455      * Read a Function token from the <code>ByteArrayInputStream</code>
456      * This function can have any number of arguments and this number is read
457      * in with the record
458 	 *
459 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
460 	 * bytes.
461 	 * @return The decoded variable argument Function <code>Token</code>
462      */
readFunctionVarToken(ByteArrayInputStream bis)463     private Token readFunctionVarToken(ByteArrayInputStream bis) {
464 
465         int numArgs = 0;
466         numArgs = bis.read();
467         byte buffer[] = new byte[2];
468         buffer[0] = (byte) bis.read();
469         buffer[1] = (byte) bis.read();
470         int functionID = EndianConverter.readShort(buffer);
471         return (tf.getFunctionToken(fl.getStringFromID(functionID),numArgs));
472     }
473 
474     /**
475      * Read a Function token from the <code>ByteArrayInputStream</code>
476      * This function has a fixed number of arguments which it will get
477      * from <code>FunctionLookup</code>.
478 	 *
479 	 * @param bis The <code>ByteArrayInputStream</code> from which we read the
480 	 * bytes.
481 	 * @return The decoded fixed argument Function <code>Token</code>
482      */
readFunctionToken(ByteArrayInputStream bis)483     private Token readFunctionToken(ByteArrayInputStream bis) {
484 
485         byte buffer[] = new byte[2];
486         buffer[0] = (byte) bis.read();
487         buffer[1] = (byte) bis.read();
488         int functionID = EndianConverter.readShort(buffer);
489         String functionName = fl.getStringFromID(functionID);
490         return (tf.getFunctionToken(functionName,fl.getArgCountFromString(functionName)));
491     }
492 
493 }
494