1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 package com.sun.star.help;
29 
30 import java.io.FileInputStream;
31 import java.io.FileOutputStream;
32 import java.util.Arrays;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.zip.ZipEntry;
36 import java.util.zip.ZipOutputStream;
37 import java.util.zip.CRC32;
38 import org.apache.lucene.analysis.standard.StandardAnalyzer;
39 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
40 import org.apache.lucene.analysis.Analyzer;
41 import org.apache.lucene.index.IndexWriter;
42 
43 import java.io.File;
44 import java.io.FileNotFoundException;
45 import java.io.IOException;
46 import java.util.Date;
47 
48 
49 /**
50    When this tool is used with long path names on Windows, that is paths which start
51    with \\?\, then the caller must make sure that the path is unique. This is achieved
52    by removing '.' and '..' from the path. Paths which are created by
53    osl_getSystemPathFromFileURL fulfill this requirement. This is necessary because
54    lucene is patched to not use File.getCanonicalPath. See long_path.patch in the lucene
55    module.
56  */
57 public class HelpIndexerTool
58 {
59     public HelpIndexerTool()
60 	{
61     }
62 
63 
64     /**
65      * @param args the command line arguments
66      */
67     public static void main( String[] args )
68 	{
69 		boolean bExtensionMode = false;
70 		mainImpl( args, bExtensionMode );
71 	}
72 
73     public static void mainImpl( String[] args, boolean bExtensionMode )
74 	{
75         String aDirToZipStr = "";
76         String aSrcDirStr = "";
77         String aLanguageStr = "";
78         String aModule = "";
79         String aTargetZipFileStr = "";
80         String aCfsName = "";
81         String aSegmentName = "";
82 
83         // Scan arguments
84         //If this tool is invoked in the build process for extensions help,
85         //then -extension must be set.
86         boolean bExtension = false;
87         boolean bLang = false;
88         boolean bMod = false;
89         boolean bZipDir = false;
90         boolean bSrcDir = false;
91         boolean bOutput = false;
92         boolean bCfsName = false;
93         boolean bSegmentName = false;
94 
95         int nArgCount = args.length;
96         for( int i = 0 ; i < nArgCount ; i++ )
97 		{
98             if( "-extension".equals(args[i]) )
99 			{
100                 bExtension = true;
101             }
102             else if( "-lang".equals(args[i]) )
103 			{
104                 if( i + 1 < nArgCount )
105 				{
106                     aLanguageStr = args[i + 1];
107                     bLang = true;
108                 }
109                 i++;
110             }
111 			else if( "-mod".equals(args[i]) )
112 			{
113                 if( i + 1 < nArgCount )
114 				{
115                     aModule = args[i + 1];
116                     bMod = true;
117                 }
118                 i++;
119             }
120 			else if( "-zipdir".equals(args[i]) )
121 			{
122                 if( i + 1 < nArgCount )
123 				{
124                     aDirToZipStr = args[i + 1];
125                     bZipDir = true;
126                 }
127                 i++;
128             }
129 			else if( "-srcdir".equals(args[i]) )
130 			{
131                 if( i + 1 < nArgCount )
132 				{
133                     aSrcDirStr = args[i + 1];
134                     bSrcDir = true;
135                 }
136                 i++;
137             }
138 			else if( "-o".equals(args[i]) )
139 			{
140                 if( i + 1 < nArgCount )
141 				{
142                     aTargetZipFileStr = args[i + 1];
143                     bOutput = true;
144                 }
145                 i++;
146             }
147 			else if( "-checkcfsandsegname".equals(args[i]) )
148 			{
149                 if( i + 1 < nArgCount )
150 				{
151                     aCfsName = args[i + 1] + ".cfs";
152                     bCfsName = true;
153                 }
154                 i++;
155                 if( i + 1 < nArgCount )
156 				{
157                     aSegmentName = "segments" + args[i + 1];
158                     bSegmentName = true;
159                 }
160                 i++;
161                 if (!(bCfsName && bSegmentName))
162                 {
163                     System.out.println("Usage: HelpIndexer -checkcfsandsegname _0 _3 (2 arguments needed)");
164                     System.exit( -1 );
165                 }
166             }
167         }
168 
169         if( !bLang || !bMod || !bZipDir || (!bOutput && !bExtensionMode && !bExtension) )
170 		{
171 			if( bExtensionMode )
172 				return;
173 
174 			System.out.println("Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -zipdir TempZipDir -o OutputZipFile");
175             System.out.println("Usage: HelpIndexer -extension -lang ISOLangCode -mod HelpModule -zipdir PathToLangDir");
176 			System.exit( -1 );
177         }
178 
179         String aIndexDirName = aModule + ".idxl";
180         File aIndexDir = new File( aDirToZipStr + File.separator + aIndexDirName );
181 		if( !bSrcDir )
182 			aSrcDirStr = aDirToZipStr;
183         File aCaptionFilesDir = new File( aSrcDirStr + File.separator + "caption" );
184         File aContentFilesDir = new File( aSrcDirStr + File.separator + "content" );
185 
186         try
187 		{
188             Date start = new Date();
189             Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer() : (Analyzer)new StandardAnalyzer();
190             IndexWriter writer = new IndexWriter( aIndexDir, analyzer, true );
191 			if( !bExtensionMode )
192 	            System.out.println( "Lucene: Indexing to directory '" + aIndexDir + "'..." );
193             int nRet = indexDocs( writer, aModule, bExtensionMode, aCaptionFilesDir, aContentFilesDir );
194             if( nRet != -1 )
195 			{
196 				if( !bExtensionMode )
197 				{
198 					System.out.println();
199 					System.out.println( "Optimizing ..." );
200 				}
201                 writer.optimize();
202             }
203             writer.close();
204 
205 			boolean bCfsFileOk = true;
206 			boolean bSegmentFileOk = true;
207 			if( bCfsName && bSegmentName && !bExtensionMode && nRet != -1 )
208 			{
209 				String aCompleteCfsFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aCfsName;
210 				String aCompleteSegmentFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aSegmentName;
211 				File aCfsFile = new File( aCompleteCfsFileName );
212 				File aSegmentFile = new File( aCompleteSegmentFileName );
213 				bCfsFileOk = aCfsFile.exists();
214 				bSegmentFileOk = aSegmentFile.exists();
215 				System.out.println( "Checking cfs file " + aCfsName+ ": " + (bCfsFileOk ? "Found" : "Not found") );
216 				System.out.println( "Checking segment file " + aSegmentName+ ": " + (bSegmentFileOk ? "Found" : "Not found") );
217 			}
218 
219 			if( bExtensionMode || bExtension)
220 			{
221 				if( !bSrcDir )
222 				{
223 					deleteRecursively( aCaptionFilesDir );
224 					deleteRecursively( aContentFilesDir );
225 				}
226 			}
227 			else
228 			{
229 				if( nRet == -1 )
230 					deleteRecursively( aIndexDir );
231 
232 				if( bCfsFileOk && bSegmentFileOk )
233 					System.out.println( "Zipping ..." );
234 				File aDirToZipFile = new File( aDirToZipStr );
235 				createZipFile( aDirToZipFile, aTargetZipFileStr );
236 				deleteRecursively( aDirToZipFile );
237 			}
238 
239 			if( !bCfsFileOk )
240 			{
241 				System.out.println( "cfs file check failed, terminating..." );
242 				System.exit( -1 );
243 			}
244 
245 			if( !bSegmentFileOk )
246 			{
247 				System.out.println( "segment file check failed, terminating..." );
248 				System.exit( -1 );
249 			}
250 
251 			Date end = new Date();
252 			if( !bExtensionMode )
253 				System.out.println(end.getTime() - start.getTime() + " total milliseconds");
254         }
255 		catch (IOException e)
256 		{
257 			if( bExtensionMode )
258 				return;
259 
260 			System.out.println(" caught a " + e.getClass() +
261 				"\n with message: " + e.getMessage());
262 			System.exit( -1 );
263         }
264     }
265 
266 	private static int indexDocs(IndexWriter writer, String aModule, boolean bExtensionMode,
267 		File aCaptionFilesDir, File aContentFilesDir) throws IOException
268 	{
269         if( !aCaptionFilesDir.canRead() || !aCaptionFilesDir.isDirectory() )
270 		{
271 			if( !bExtensionMode )
272 	            System.out.println( "Not found: " + aCaptionFilesDir );
273             return -1;
274         }
275         if( !aContentFilesDir.canRead() || !aContentFilesDir.isDirectory() )
276 		{
277 			if( !bExtensionMode )
278 	            System.out.println( "Not found: " + aContentFilesDir );
279             return -1;
280         }
281 
282         String[] aCaptionFiles = aCaptionFilesDir.list();
283         List aCaptionFilesList = Arrays.asList( aCaptionFiles );
284         HashSet aCaptionFilesHashSet = new HashSet( aCaptionFilesList );
285 
286         String[] aContentFiles = aContentFilesDir.list();
287         List aContentFilesList = Arrays.asList( aContentFiles );
288         HashSet aContentFilesHashSet = new HashSet( aContentFilesList );
289 
290         // Loop over caption files and find corresponding content file
291 		if( !bExtensionMode )
292 	        System.out.println( "Indexing, adding files" );
293         int nCaptionFilesLen = aCaptionFiles.length;
294         for( int i = 0 ; i < nCaptionFilesLen ; i++ )
295 		{
296             String aCaptionFileStr = aCaptionFiles[i];
297             File aCaptionFile = new File( aCaptionFilesDir, aCaptionFileStr );
298             File aContentFile = null;
299             if( aContentFilesHashSet.contains( aCaptionFileStr ) )
300                 aContentFile = new File( aContentFilesDir, aCaptionFileStr );
301 
302 			if( !bExtensionMode )
303 				System.out.print( "." );
304             writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
305         }
306 
307         // Loop over content files to find remaining files not mapped to caption files
308         int nContentFilesLen = aContentFiles.length;
309         for( int i = 0 ; i < nContentFilesLen ; i++ )
310 		{
311             String aContentFileStr = aContentFiles[i];
312             if( !aCaptionFilesHashSet.contains( aContentFileStr ) )
313 			{
314                 // Not already handled in caption files loop
315                 File aCaptionFile = null;
316                 File aContentFile = new File( aContentFilesDir, aContentFileStr );
317 				if( !bExtensionMode )
318 					System.out.print( "." );
319                 writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
320             }
321         }
322         return 0;
323     }
324 
325     public static void createZipFile( File aDirToZip, String aTargetZipFileStr )
326             throws FileNotFoundException, IOException
327 	{
328         FileOutputStream fos = new FileOutputStream( aTargetZipFileStr );
329         ZipOutputStream zos = new ZipOutputStream( fos );
330 
331         File[] aChildrenFiles = aDirToZip.listFiles();
332         int nFileCount = aChildrenFiles.length;
333         for( int i = 0 ; i < nFileCount ; i++ )
334             addToZipRecursively( zos, aChildrenFiles[i], null );
335 
336         zos.close();
337     }
338 
339     public static void addToZipRecursively( ZipOutputStream zos, File aFile, String aBasePath )
340             throws FileNotFoundException, IOException
341 	{
342         if( aFile.isDirectory() )
343 		{
344             String aDirName = aFile.getName();
345             if( aDirName.equalsIgnoreCase( "caption" ) || aDirName.equalsIgnoreCase( "content" ) )
346                 return;
347 
348             File[] aChildrenFiles = aFile.listFiles();
349             String aNewBasePath = "";
350             if( aBasePath != null )
351                 aNewBasePath += aBasePath + File.separator;
352             aNewBasePath += aDirName;
353 
354             int nFileCount = aChildrenFiles.length;
355             for( int i = 0 ; i < nFileCount ; i++ )
356                 addToZipRecursively( zos, aChildrenFiles[i], aNewBasePath );
357 
358             return;
359         }
360 
361         // No directory
362         // read contents of file we are going to put in the zip
363         int fileLength = (int) aFile.length();
364         FileInputStream fis = new FileInputStream( aFile );
365         byte[] wholeFile = new byte[fileLength];
366         int bytesRead = fis.read( wholeFile, 0, fileLength );
367         fis.close();
368 
369         String aFileName = aFile.getName();
370         String aEntryName = "";
371         if( aBasePath != null )
372             aEntryName += aBasePath + "/";
373         aEntryName += aFileName;
374         ZipEntry aZipEntry = new ZipEntry( aEntryName );
375         aZipEntry.setTime( aFile.lastModified() );
376         aZipEntry.setSize( fileLength );
377 
378         int nMethod = ( aFileName.toLowerCase().endsWith( ".jar" ) )
379                 ? ZipEntry.STORED : ZipEntry.DEFLATED;
380         aZipEntry.setMethod( nMethod );
381 
382         CRC32 tempCRC = new CRC32();
383         tempCRC.update( wholeFile, 0, wholeFile.length );
384         aZipEntry.setCrc( tempCRC.getValue() );
385 
386         // write the contents into the zip element
387         zos.putNextEntry( aZipEntry );
388         zos.write( wholeFile, 0, fileLength );
389         zos.closeEntry();
390     }
391 
392     static public boolean deleteRecursively( File aFile )
393 	{
394         if( aFile.isDirectory() )
395 		{
396             File[] aChildrenFiles = aFile.listFiles();
397             int nFileCount = aChildrenFiles.length;
398             for( int i = 0 ; i < nFileCount ; i++ )
399 			{
400                 File aChildrenFile = aChildrenFiles[i];
401                 boolean bSuccess = deleteRecursively( aChildrenFile );
402                 if( !bSuccess )
403                     return false;
404             }
405         }
406 
407         return aFile.delete();
408     }
409 }
410 
411