xref: /AOO42X/main/l10ntools/source/help/HelpIndexerTool.java (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 package com.sun.star.help;
29 
30 import java.io.FileInputStream;
31 import java.io.FileOutputStream;
32 import java.util.Arrays;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.zip.ZipEntry;
36 import java.util.zip.ZipOutputStream;
37 import java.util.zip.CRC32;
38 import org.apache.lucene.analysis.standard.StandardAnalyzer;
39 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
40 import org.apache.lucene.analysis.Analyzer;
41 import org.apache.lucene.index.IndexWriter;
42 
43 import java.io.File;
44 import java.io.FileNotFoundException;
45 import java.io.IOException;
46 import java.util.Date;
47 
48 
49 /**
50    When this tool is used with long path names on Windows, that is paths which start
51    with \\?\, then the caller must make sure that the path is unique. This is achieved
52    by removing '.' and '..' from the path. Paths which are created by
53    osl_getSystemPathFromFileURL fulfill this requirement. This is necessary because
54    lucene is patched to not use File.getCanonicalPath. See long_path.patch in the lucene
55    module.
56  */
57 public class HelpIndexerTool
58 {
59     public HelpIndexerTool()
60     {
61     }
62 
63 
64     /**
65      * @param args the command line arguments
66      */
67     public static void main( String[] args )
68     {
69         boolean bExtensionMode = false;
70         mainImpl( args, bExtensionMode );
71     }
72 
73     public static void mainImpl( String[] args, boolean bExtensionMode )
74     {
75         String aDirToZipStr = "";
76         String aSrcDirStr = "";
77         String aLanguageStr = "";
78         String aModule = "";
79         String aTargetZipFileStr = "";
80         String aCfsName = "";
81         String aSegmentName = "";
82 
83         // Scan arguments
84         //If this tool is invoked in the build process for extensions help,
85         //then -extension must be set.
86         boolean bExtension = false;
87         boolean bLang = false;
88         boolean bMod = false;
89         boolean bZipDir = false;
90         boolean bSrcDir = false;
91         boolean bOutput = false;
92         boolean bCfsName = false;
93         boolean bSegmentName = false;
94 
95         int nArgCount = args.length;
96         for( int i = 0 ; i < nArgCount ; i++ )
97         {
98             if( "-extension".equals(args[i]) )
99             {
100                 bExtension = true;
101             }
102             else if( "-lang".equals(args[i]) )
103             {
104                 if( i + 1 < nArgCount )
105                 {
106                     aLanguageStr = args[i + 1];
107                     bLang = true;
108                 }
109                 i++;
110             }
111             else if( "-mod".equals(args[i]) )
112             {
113                 if( i + 1 < nArgCount )
114                 {
115                     aModule = args[i + 1];
116                     bMod = true;
117                 }
118                 i++;
119             }
120             else if( "-zipdir".equals(args[i]) )
121             {
122                 if( i + 1 < nArgCount )
123                 {
124                     aDirToZipStr = args[i + 1];
125                     bZipDir = true;
126                 }
127                 i++;
128             }
129             else if( "-srcdir".equals(args[i]) )
130             {
131                 if( i + 1 < nArgCount )
132                 {
133                     aSrcDirStr = args[i + 1];
134                     bSrcDir = true;
135                 }
136                 i++;
137             }
138             else if( "-o".equals(args[i]) )
139             {
140                 if( i + 1 < nArgCount )
141                 {
142                     aTargetZipFileStr = args[i + 1];
143                     bOutput = true;
144                 }
145                 i++;
146             }
147             else if( "-checkcfsandsegname".equals(args[i]) )
148             {
149                 if( i + 1 < nArgCount )
150                 {
151                     aCfsName = args[i + 1] + ".cfs";
152                     bCfsName = true;
153                 }
154                 i++;
155                 if( i + 1 < nArgCount )
156                 {
157                     aSegmentName = "segments" + args[i + 1];
158                     bSegmentName = true;
159                 }
160                 i++;
161                 if (!(bCfsName && bSegmentName))
162                 {
163                     System.out.println("Usage: HelpIndexer -checkcfsandsegname _0 _3 (2 arguments needed)");
164                     System.exit( -1 );
165                 }
166             }
167         }
168 
169         if( !bLang || !bMod || !bZipDir || (!bOutput && !bExtensionMode && !bExtension) )
170         {
171             if( bExtensionMode )
172                 return;
173 
174             System.out.println("Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -zipdir TempZipDir -o OutputZipFile");
175             System.out.println("Usage: HelpIndexer -extension -lang ISOLangCode -mod HelpModule -zipdir PathToLangDir");
176             System.exit( -1 );
177         }
178 
179         String aIndexDirName = aModule + ".idxl";
180         File aIndexDir = new File( aDirToZipStr + File.separator + aIndexDirName );
181         if( !bSrcDir )
182             aSrcDirStr = aDirToZipStr;
183         File aCaptionFilesDir = new File( aSrcDirStr + File.separator + "caption" );
184         File aContentFilesDir = new File( aSrcDirStr + File.separator + "content" );
185 
186         try
187         {
188             Date start = new Date();
189             Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer() : (Analyzer)new StandardAnalyzer();
190             IndexWriter writer = new IndexWriter( aIndexDir, analyzer, true );
191             if( !bExtensionMode )
192                 System.out.println( "Lucene: Indexing to directory '" + aIndexDir + "'..." );
193             int nRet = indexDocs( writer, aModule, bExtensionMode, aCaptionFilesDir, aContentFilesDir );
194             if( nRet != -1 )
195             {
196                 if( !bExtensionMode )
197                 {
198                     System.out.println();
199                     System.out.println( "Optimizing ..." );
200                 }
201                 writer.optimize();
202             }
203             writer.close();
204 
205             boolean bCfsFileOk = true;
206             boolean bSegmentFileOk = true;
207             if( bCfsName && bSegmentName && !bExtensionMode && nRet != -1 )
208             {
209                 String aCompleteCfsFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aCfsName;
210                 String aCompleteSegmentFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aSegmentName;
211                 File aCfsFile = new File( aCompleteCfsFileName );
212                 File aSegmentFile = new File( aCompleteSegmentFileName );
213                 bCfsFileOk = aCfsFile.exists();
214                 bSegmentFileOk = aSegmentFile.exists();
215                 System.out.println( "Checking cfs file " + aCfsName+ ": " + (bCfsFileOk ? "Found" : "Not found") );
216                 System.out.println( "Checking segment file " + aSegmentName+ ": " + (bSegmentFileOk ? "Found" : "Not found") );
217             }
218 
219             if( bExtensionMode || bExtension)
220             {
221                 if( !bSrcDir )
222                 {
223                     deleteRecursively( aCaptionFilesDir );
224                     deleteRecursively( aContentFilesDir );
225                 }
226             }
227             else
228             {
229                 if( nRet == -1 )
230                     deleteRecursively( aIndexDir );
231 
232                 if( bCfsFileOk && bSegmentFileOk )
233                     System.out.println( "Zipping ..." );
234                 File aDirToZipFile = new File( aDirToZipStr );
235                 createZipFile( aDirToZipFile, aTargetZipFileStr );
236                 deleteRecursively( aDirToZipFile );
237             }
238 
239             if( !bCfsFileOk )
240             {
241                 System.out.println( "cfs file check failed, terminating..." );
242                 System.exit( -1 );
243             }
244 
245             if( !bSegmentFileOk )
246             {
247                 System.out.println( "segment file check failed, terminating..." );
248                 System.exit( -1 );
249             }
250 
251             Date end = new Date();
252             if( !bExtensionMode )
253                 System.out.println(end.getTime() - start.getTime() + " total milliseconds");
254         }
255         catch (IOException e)
256         {
257             if( bExtensionMode )
258                 return;
259 
260             System.out.println(" caught a " + e.getClass() +
261                 "\n with message: " + e.getMessage());
262             System.exit( -1 );
263         }
264     }
265 
266     private static int indexDocs(IndexWriter writer, String aModule, boolean bExtensionMode,
267         File aCaptionFilesDir, File aContentFilesDir) throws IOException
268     {
269         if( !aCaptionFilesDir.canRead() || !aCaptionFilesDir.isDirectory() )
270         {
271             if( !bExtensionMode )
272                 System.out.println( "Not found: " + aCaptionFilesDir );
273             return -1;
274         }
275         if( !aContentFilesDir.canRead() || !aContentFilesDir.isDirectory() )
276         {
277             if( !bExtensionMode )
278                 System.out.println( "Not found: " + aContentFilesDir );
279             return -1;
280         }
281 
282         String[] aCaptionFiles = aCaptionFilesDir.list();
283         List aCaptionFilesList = Arrays.asList( aCaptionFiles );
284         HashSet aCaptionFilesHashSet = new HashSet( aCaptionFilesList );
285 
286         String[] aContentFiles = aContentFilesDir.list();
287         List aContentFilesList = Arrays.asList( aContentFiles );
288         HashSet aContentFilesHashSet = new HashSet( aContentFilesList );
289 
290         // Loop over caption files and find corresponding content file
291         if( !bExtensionMode )
292             System.out.println( "Indexing, adding files" );
293         int nCaptionFilesLen = aCaptionFiles.length;
294         for( int i = 0 ; i < nCaptionFilesLen ; i++ )
295         {
296             String aCaptionFileStr = aCaptionFiles[i];
297             File aCaptionFile = new File( aCaptionFilesDir, aCaptionFileStr );
298             File aContentFile = null;
299             if( aContentFilesHashSet.contains( aCaptionFileStr ) )
300                 aContentFile = new File( aContentFilesDir, aCaptionFileStr );
301 
302             if( !bExtensionMode )
303                 System.out.print( "." );
304             writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
305         }
306 
307         // Loop over content files to find remaining files not mapped to caption files
308         int nContentFilesLen = aContentFiles.length;
309         for( int i = 0 ; i < nContentFilesLen ; i++ )
310         {
311             String aContentFileStr = aContentFiles[i];
312             if( !aCaptionFilesHashSet.contains( aContentFileStr ) )
313             {
314                 // Not already handled in caption files loop
315                 File aCaptionFile = null;
316                 File aContentFile = new File( aContentFilesDir, aContentFileStr );
317                 if( !bExtensionMode )
318                     System.out.print( "." );
319                 writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
320             }
321         }
322         return 0;
323     }
324 
325     public static void createZipFile( File aDirToZip, String aTargetZipFileStr )
326             throws FileNotFoundException, IOException
327     {
328         FileOutputStream fos = new FileOutputStream( aTargetZipFileStr );
329         ZipOutputStream zos = new ZipOutputStream( fos );
330 
331         File[] aChildrenFiles = aDirToZip.listFiles();
332         int nFileCount = aChildrenFiles.length;
333         for( int i = 0 ; i < nFileCount ; i++ )
334             addToZipRecursively( zos, aChildrenFiles[i], null );
335 
336         zos.close();
337     }
338 
339     public static void addToZipRecursively( ZipOutputStream zos, File aFile, String aBasePath )
340             throws FileNotFoundException, IOException
341     {
342         if( aFile.isDirectory() )
343         {
344             String aDirName = aFile.getName();
345             if( aDirName.equalsIgnoreCase( "caption" ) || aDirName.equalsIgnoreCase( "content" ) )
346                 return;
347 
348             File[] aChildrenFiles = aFile.listFiles();
349             String aNewBasePath = "";
350             if( aBasePath != null )
351                 aNewBasePath += aBasePath + File.separator;
352             aNewBasePath += aDirName;
353 
354             int nFileCount = aChildrenFiles.length;
355             for( int i = 0 ; i < nFileCount ; i++ )
356                 addToZipRecursively( zos, aChildrenFiles[i], aNewBasePath );
357 
358             return;
359         }
360 
361         // No directory
362         // read contents of file we are going to put in the zip
363         int fileLength = (int) aFile.length();
364         FileInputStream fis = new FileInputStream( aFile );
365         byte[] wholeFile = new byte[fileLength];
366         int bytesRead = fis.read( wholeFile, 0, fileLength );
367         fis.close();
368 
369         String aFileName = aFile.getName();
370         String aEntryName = "";
371         if( aBasePath != null )
372             aEntryName += aBasePath + "/";
373         aEntryName += aFileName;
374         ZipEntry aZipEntry = new ZipEntry( aEntryName );
375         aZipEntry.setTime( aFile.lastModified() );
376         aZipEntry.setSize( fileLength );
377 
378         int nMethod = ( aFileName.toLowerCase().endsWith( ".jar" ) )
379                 ? ZipEntry.STORED : ZipEntry.DEFLATED;
380         aZipEntry.setMethod( nMethod );
381 
382         CRC32 tempCRC = new CRC32();
383         tempCRC.update( wholeFile, 0, wholeFile.length );
384         aZipEntry.setCrc( tempCRC.getValue() );
385 
386         // write the contents into the zip element
387         zos.putNextEntry( aZipEntry );
388         zos.write( wholeFile, 0, fileLength );
389         zos.closeEntry();
390     }
391 
392     static public boolean deleteRecursively( File aFile )
393     {
394         if( aFile.isDirectory() )
395         {
396             File[] aChildrenFiles = aFile.listFiles();
397             int nFileCount = aChildrenFiles.length;
398             for( int i = 0 ; i < nFileCount ; i++ )
399             {
400                 File aChildrenFile = aChildrenFiles[i];
401                 boolean bSuccess = deleteRecursively( aChildrenFile );
402                 if( !bSuccess )
403                     return false;
404             }
405         }
406 
407         return aFile.delete();
408     }
409 }
410 
411