1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; 25 26 import java.io.ByteArrayInputStream; 27 import java.io.DataInputStream; 28 import java.io.IOException; 29 30 import org.openoffice.xmerge.converter.palm.Record; 31 import org.openoffice.xmerge.util.Resources; 32 import org.openoffice.xmerge.util.Debug; 33 34 /** 35 * This class is used by {@link DocumentDeserializerImpl} 36 * to decode the AportisDoc format. It currently decodes 37 * the text content into a single <code>String</code> object. 38 * 39 * @author Herbie Ong 40 */ 41 final class DocDecoder implements DocConstants { 42 43 /** For decoding purposes. */ 44 private final static int COUNT_BITS = 3; 45 46 /** Resources object for I18N. */ 47 private Resources res = null; 48 49 50 /** 51 * Default constructor creates a header and a text buffer 52 * for holding all the text in the AportisDoc database. 53 */ DocDecoder()54 DocDecoder() { 55 res = Resources.getInstance(); 56 } 57 58 59 /** 60 * Decode the text records into a single <code>String</code> 61 * of text content. 62 * 63 * @param recs <code>Record</code> array holding AportisDoc 64 * contents. 65 * 66 * @throws IOException If any I/O error occurs. 67 */ parseRecords(Record[] recs)68 String parseRecords(Record[] recs) throws IOException { 69 70 // read the header record 71 HeaderInfo header = readHeader(recs[0].getBytes()); 72 73 dumpHeader(header); 74 75 // store all the characters in textBuffer 76 StringBuffer textBuffer = new StringBuffer(header.textLen); 77 78 switch (header.version) { 79 80 case COMPRESSED: 81 for (int i = 1; i <= header.textRecordCount; i++) { 82 83 byte[] bytes = decompress(recs[i].getBytes(), 84 header.textRecordSize); 85 log("processing " + bytes.length + " bytes"); 86 String str = new String(bytes, ENCODING); 87 textBuffer.append(str); 88 } 89 90 break; 91 92 case UNCOMPRESSED: 93 for (int i = 1; i <= header.textRecordCount; i++) { 94 95 byte[] bytes = recs[i].getBytes(); 96 log("processing " + bytes.length + " bytes"); 97 String str = new String(bytes, ENCODING); 98 textBuffer.append(str); 99 } 100 101 break; 102 103 default: 104 throw new IOException(res.getString("UNKNOWN_DOC_VERSION")); 105 106 } 107 108 return textBuffer.toString(); 109 } 110 111 112 /** 113 * <p>Decompress the <code>byte</code> array.</p> 114 * 115 * <p>The resulting uncompressed <code>byte</code> array should 116 * be within <code>textRecordSize</code> length, definitely 117 * within twice the size it claims, else treat it as a problem 118 * with the encoding of that PDB and throw 119 * <code>IOException</code>.</p> 120 * 121 * @param cBytes Compressed <code>byte</code> array. 122 * @param textRecordSize Size of uncompressed 123 * <code>byte</code> array. 124 * 125 * @throws IOException If <code>textRecordSize</code> < 126 * <code>cBytes.length</code>. 127 */ decompress(byte[] cBytes, int textRecordSize)128 private byte[] decompress(byte[] cBytes, int textRecordSize) 129 throws IOException { 130 131 // create byte array for storing uncompressed bytes 132 // it should be within textRecordSize range, definitely 133 // within twice of textRecordSize! if not, then 134 // an ArrayIndexOutOfBoundsException will get thrown, 135 // and it should be converted into an IOException, and 136 // treat it as a conversion error. 137 byte[] uBytes = new byte[textRecordSize*2]; 138 139 int up = 0; 140 int cp = 0; 141 142 try { 143 144 while (cp < cBytes.length) { 145 146 int c = cBytes[cp++] & 0xff; 147 148 // codes 1...8 mean copy that many bytes 149 if (c > 0 && c < 9) { 150 151 while (c-- > 0) 152 uBytes[up++] = cBytes[cp++]; 153 } 154 155 // codes 0, 9...0x7F represent themselves 156 else if (c < 0x80) { 157 uBytes[up++] = (byte) c; 158 } 159 160 // codes 0xC0...0xFF represent "space + ascii char" 161 else if (c >= 0xC0) { 162 uBytes[up++] = (byte) ' '; 163 uBytes[up++] = (byte) (c ^ 0x80); 164 } 165 166 // codes 0x80...0xBf represent sequences 167 else { 168 c <<= 8; 169 c += cBytes[cp++] & 0xff; 170 int m = (c & 0x3fff) >> COUNT_BITS; 171 int n = c & ((1 << COUNT_BITS) - 1); 172 n += COUNT_BITS; 173 while (n-- > 0) { 174 uBytes[up] = uBytes[up - m]; 175 up++; 176 } 177 } 178 } 179 180 } catch (ArrayIndexOutOfBoundsException e) { 181 182 throw new IOException( 183 res.getString("DOC_TEXT_RECORD_SIZE_EXCEEDED")); 184 } 185 186 // note that ubytes may be larger that the amount of 187 // uncompressed bytes, so trim it to another byte array 188 // with the exact size. 189 byte[] textBytes = new byte[up]; 190 System.arraycopy(uBytes, 0, textBytes, 0, up); 191 192 return textBytes; 193 } 194 195 196 /** 197 * Read the header <code>byte</code> array. 198 * 199 * @param bytes <code>byte</code> array containing header 200 * record data. 201 * 202 * @return <code>HeaderInfo</code> object. 203 * 204 * @throws IOException If any I/O error occurs. 205 */ readHeader(byte[] bytes)206 private HeaderInfo readHeader(byte[] bytes) throws IOException { 207 208 HeaderInfo header = new HeaderInfo(); 209 210 ByteArrayInputStream bis = new ByteArrayInputStream(bytes); 211 DataInputStream dis = new DataInputStream(bis); 212 213 // Normally the first 2 bytes comprised of the version 214 // which should either be COMPRESSED or UNCOMPRESSED 215 // SmartDoc/Quickword would add a 0x01 to the first 216 // byte, thus their version would be 0x0101 for UNCOMPRESSED 217 // instead of 0x0001 and 0x0102 for UNCOMPRESSED instead of 218 // 0x0002. 219 220 dis.readByte(); 221 header.version = dis.readByte(); 222 223 // read extra 2 unused bytes 224 dis.readShort(); 225 226 // Read the text length, this should be unsigned 4 bytes. 227 // We could store the read value into a long, but then 228 // our current buffer limit is the max positive of an int. 229 // That is a large enough limit, thus we shall stay with 230 // storing the value in an int. If it exceeds, then 231 // an IOException should be thrown. 232 header.textLen = dis.readInt(); 233 if (header.textLen < 0) { 234 throw new IOException(res.getString("DOC_TEXT_LENGTH_EXCEEDED")); 235 } 236 237 // read the number of records - unsigned 2 bytes 238 header.textRecordCount = ((int) dis.readShort()) & 0x0000ffff; 239 240 // read the record size - unsigned 2 bytes 241 header.textRecordSize = ((int) dis.readShort()) & 0x0000ffff; 242 243 // read extra 4 unused bytes 244 dis.readInt(); 245 246 return header; 247 } 248 249 250 /** 251 * Prints out header info into log. Used for debugging purposes only. 252 * 253 * @param header <code>HeaderInfo</code> structure. 254 */ dumpHeader(HeaderInfo header)255 private void dumpHeader(HeaderInfo header) { 256 257 log("<DOC_INFO "); 258 log("version=\"" + header.version + "\" "); 259 log("text-length=\"" + header.textLen + "\" "); 260 log("number-of-records=\"" + header.textRecordCount + "\" "); 261 log("record-size=\"" + header.textRecordSize + "\" />"); 262 } 263 264 265 /** 266 * Sends message to the log object. 267 * 268 * @param str Debug string message. 269 */ log(String str)270 private void log(String str) { 271 Debug.log(Debug.TRACE, str); 272 } 273 274 275 /** 276 * Inner class to store AportisDoc header information. 277 */ 278 private class HeaderInfo { 279 280 /** length of text section */ 281 int textLen = 0; 282 283 /** number of text records */ 284 int textRecordCount = 0; 285 286 /** 287 * size of a text record. This is normally the same as 288 * TEXT_RECORD_SIZE, but some applications may modify this. 289 */ 290 int textRecordSize = 0; 291 292 /** compression type */ 293 int version = 0; 294 } 295 } 296 297