1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include "HelpCompiler.hxx"
29 
30 #include <map>
31 
32 #include <string.h>
33 #include <limits.h>
34 
35 #include <libxslt/xslt.h>
36 #include <libxslt/transform.h>
37 #include <libxslt/xsltutils.h>
38 #include <libxslt/functions.h>
39 #include <libxslt/extensions.h>
40 
41 #include <sal/types.h>
42 #include <osl/time.h>
43 #include <rtl/bootstrap.hxx>
44 
45 #include <expat.h>
46 
47 #define DBHELP_ONLY
48 
49 class IndexerPreProcessor
50 {
51 private:
52     std::string       m_aModuleName;
53     fs::path          m_fsIndexBaseDir;
54     fs::path          m_fsCaptionFilesDirName;
55     fs::path          m_fsContentFilesDirName;
56 
57     xsltStylesheetPtr m_xsltStylesheetPtrCaption;
58     xsltStylesheetPtr m_xsltStylesheetPtrContent;
59 
60 public:
61     IndexerPreProcessor( const std::string& aModuleName, const fs::path& fsIndexBaseDir,
62          const fs::path& idxCaptionStylesheet, const fs::path& idxContentStylesheet );
63     ~IndexerPreProcessor();
64 
65     void processDocument( xmlDocPtr doc, const std::string& EncodedDocPath );
66 };
67 
68 IndexerPreProcessor::IndexerPreProcessor
69     ( const std::string& aModuleName, const fs::path& fsIndexBaseDir,
70       const fs::path& idxCaptionStylesheet, const fs::path& idxContentStylesheet )
71         : m_aModuleName( aModuleName )
72         , m_fsIndexBaseDir( fsIndexBaseDir )
73 {
74     m_fsCaptionFilesDirName = fsIndexBaseDir / "caption";
75     fs::create_directory( m_fsCaptionFilesDirName );
76 
77     m_fsContentFilesDirName = fsIndexBaseDir / "content";
78     fs::create_directory( m_fsContentFilesDirName );
79 
80     m_xsltStylesheetPtrCaption = xsltParseStylesheetFile
81         ((const xmlChar *)idxCaptionStylesheet.native_file_string().c_str());
82     m_xsltStylesheetPtrContent = xsltParseStylesheetFile
83         ((const xmlChar *)idxContentStylesheet.native_file_string().c_str());
84 }
85 
86 IndexerPreProcessor::~IndexerPreProcessor()
87 {
88     if( m_xsltStylesheetPtrCaption )
89         xsltFreeStylesheet( m_xsltStylesheetPtrCaption );
90     if( m_xsltStylesheetPtrContent )
91         xsltFreeStylesheet( m_xsltStylesheetPtrContent );
92 }
93 
94 
95 std::string getEncodedPath( const std::string& Path )
96 {
97     rtl::OString aOStr_Path( Path.c_str() );
98     rtl::OUString aOUStr_Path( rtl::OStringToOUString
99         ( aOStr_Path, fs::getThreadTextEncoding() ) );
100     rtl::OUString aPathURL;
101     osl::File::getFileURLFromSystemPath( aOUStr_Path, aPathURL );
102     rtl::OString aOStr_PathURL( rtl::OUStringToOString
103         ( aPathURL, fs::getThreadTextEncoding() ) );
104     std::string aStdStr_PathURL( aOStr_PathURL.getStr() );
105     return aStdStr_PathURL;
106 }
107 
108 void IndexerPreProcessor::processDocument
109     ( xmlDocPtr doc, const std::string &EncodedDocPath )
110 {
111     std::string aStdStr_EncodedDocPathURL = getEncodedPath( EncodedDocPath );
112 
113     if( m_xsltStylesheetPtrCaption )
114     {
115         xmlDocPtr resCaption = xsltApplyStylesheet( m_xsltStylesheetPtrCaption, doc, NULL );
116         xmlNodePtr pResNodeCaption = resCaption->xmlChildrenNode;
117         if( pResNodeCaption )
118         {
119             fs::path fsCaptionPureTextFile_docURL = m_fsCaptionFilesDirName / aStdStr_EncodedDocPathURL;
120             std::string aCaptionPureTextFileStr_docURL = fsCaptionPureTextFile_docURL.native_file_string();
121 #ifdef WNT     //We need _wfopen to support long file paths on Windows XP
122             FILE* pFile_docURL = _wfopen(
123                 fsCaptionPureTextFile_docURL.native_file_string_w(), L"w" );
124 #else
125             FILE* pFile_docURL = fopen(
126                 fsCaptionPureTextFile_docURL.native_file_string().c_str(), "w" );
127 #endif
128             if( pFile_docURL )
129             {
130                 fprintf( pFile_docURL, "%s\n", pResNodeCaption->content );
131                 fclose( pFile_docURL );
132             }
133         }
134         xmlFreeDoc(resCaption);
135     }
136 
137     if( m_xsltStylesheetPtrContent )
138     {
139         xmlDocPtr resContent = xsltApplyStylesheet( m_xsltStylesheetPtrContent, doc, NULL );
140         xmlNodePtr pResNodeContent = resContent->xmlChildrenNode;
141         if( pResNodeContent )
142         {
143             fs::path fsContentPureTextFile_docURL = m_fsContentFilesDirName / aStdStr_EncodedDocPathURL;
144 #ifdef WNT     //We need _wfopen to support long file paths on Windows XP
145             FILE* pFile_docURL = _wfopen(
146                 fsContentPureTextFile_docURL.native_file_string_w(), L"w" );
147 #else
148             FILE* pFile_docURL = fopen(
149                 fsContentPureTextFile_docURL.native_file_string().c_str(), "w" );
150 #endif
151             if( pFile_docURL )
152             {
153                 fprintf( pFile_docURL, "%s\n", pResNodeContent->content );
154                 fclose( pFile_docURL );
155             }
156         }
157         xmlFreeDoc(resContent);
158     }
159 }
160 
161 struct Data
162 {
163     std::vector<std::string> _idList;
164     typedef std::vector<std::string>::const_iterator cIter;
165 
166     void append(const std::string &id)
167     {
168         _idList.push_back(id);
169     }
170 
171     std::string getString() const
172     {
173         std::string ret;
174         cIter aEnd = _idList.end();
175         for (cIter aIter = _idList.begin(); aIter != aEnd; ++aIter)
176             ret += *aIter + ";";
177         return ret;
178     }
179 };
180 
181 void writeKeyValue_DBHelp( FILE* pFile, const std::string& aKeyStr, const std::string& aValueStr )
182 {
183     if( pFile == NULL )
184         return;
185     char cLF = 10;
186     unsigned int nKeyLen = aKeyStr.length();
187     unsigned int nValueLen = aValueStr.length();
188     fprintf( pFile, "%x ", nKeyLen );
189     if( nKeyLen > 0 )
190     {
191         if (fwrite( aKeyStr.c_str(), 1, nKeyLen, pFile ) != nKeyLen)
192             fprintf(stderr, "fwrite to db failed\n");
193     }
194     if (fprintf( pFile, " %x ", nValueLen ) < 0)
195         fprintf(stderr, "fwrite to db failed\n");
196     if( nValueLen > 0 )
197     {
198         if (fwrite( aValueStr.c_str(), 1, nValueLen, pFile ) != nValueLen)
199             fprintf(stderr, "fwrite to db failed\n");
200     }
201     if (fprintf( pFile, "%c", cLF ) < 0)
202         fprintf(stderr, "fwrite to db failed\n");
203 }
204 
205 class HelpKeyword
206 {
207 private:
208     typedef std::hash_map<std::string, Data, pref_hash> DataHashtable;
209     DataHashtable _hash;
210 
211 public:
212     void insert(const std::string &key, const std::string &id)
213     {
214         Data &data = _hash[key];
215         data.append(id);
216     }
217 
218     void dump(DB* table)
219     {
220         DataHashtable::const_iterator aEnd = _hash.end();
221         for (DataHashtable::const_iterator aIter = _hash.begin(); aIter != aEnd; ++aIter)
222         {
223             const std::string &keystr = aIter->first;
224             DBT key;
225             memset(&key, 0, sizeof(key));
226             key.data = const_cast<char*>(keystr.c_str());
227             key.size = keystr.length();
228 
229             const Data &data = aIter->second;
230             std::string str = data.getString();
231             DBT value;
232             memset(&value, 0, sizeof(value));
233             value.data = const_cast<char*>(str.c_str());
234             value.size = str.length();
235 
236             table->put(table, NULL, &key, &value, 0);
237         }
238     }
239 
240     void dump_DBHelp( const fs::path& rFileName )
241     {
242 #ifdef WNT     //We need _wfopen to support long file paths on Windows XP
243         FILE* pFile = _wfopen( rFileName.native_file_string_w(), L"wb" );
244 #else
245         FILE* pFile = fopen( rFileName.native_file_string().c_str(), "wb" );
246 #endif
247         if( pFile == NULL )
248             return;
249 
250         DataHashtable::const_iterator aEnd = _hash.end();
251         for (DataHashtable::const_iterator aIter = _hash.begin(); aIter != aEnd; ++aIter)
252             writeKeyValue_DBHelp( pFile, aIter->first, aIter->second.getString() );
253 
254         fclose( pFile );
255     }
256 };
257 
258 class HelpLinker
259 {
260 public:
261     void main(std::vector<std::string> &args,
262               std::string* pExtensionPath = NULL,
263               std::string* pDestination = NULL,
264               const rtl::OUString* pOfficeHelpPath = NULL )
265 
266             throw( HelpProcessingException );
267 
268     HelpLinker()
269         : init(true)
270         , m_pIndexerPreProcessor(NULL)
271     {}
272     ~HelpLinker()
273         { delete m_pIndexerPreProcessor; }
274 
275 private:
276     int locCount, totCount;
277     Stringtable additionalFiles;
278     HashSet helpFiles;
279     fs::path sourceRoot;
280     fs::path embeddStylesheet;
281     fs::path idxCaptionStylesheet;
282     fs::path idxContentStylesheet;
283     fs::path zipdir;
284     fs::path outputFile;
285     std::string extsource;
286     std::string extdestination;
287     std::string module;
288     std::string lang;
289     std::string extensionPath;
290     std::string extensionDestination;
291     bool bExtensionMode;
292     fs::path indexDirName;
293     fs::path indexDirParentName;
294     bool init;
295     IndexerPreProcessor* m_pIndexerPreProcessor;
296     void initIndexerPreProcessor();
297     void link() throw( HelpProcessingException );
298     void addBookmark( DB* dbBase, FILE* pFile_DBHelp, std::string thishid,
299         const std::string& fileB, const std::string& anchorB,
300         const std::string& jarfileB, const std::string& titleB );
301 #if 0
302     /**
303      * @param outputFile
304      * @param module
305      * @param lang
306      * @param hid
307      * @param helpFiles
308      * @param additionalFiles
309      */
310 
311     private HelpURLStreamHandlerFactory urlHandler = null;
312 #endif
313 };
314 
315 namespace URLEncoder
316 {
317     static std::string encode(const std::string &rIn)
318     {
319         const char *good = "!$&'()*+,-.=@_";
320         static const char hex[17] = "0123456789ABCDEF";
321 
322         std::string result;
323         for (size_t i=0; i < rIn.length(); ++i)
324         {
325             unsigned char c = rIn[i];
326             if (isalnum (c) || strchr (good, c))
327                 result += c;
328             else {
329                 result += '%';
330                 result += hex[c >> 4];
331                 result += hex[c & 0xf];
332             }
333         }
334         return result;
335     }
336 }
337 
338 void HelpLinker::addBookmark( DB* dbBase, FILE* pFile_DBHelp, std::string thishid,
339         const std::string& fileB, const std::string& anchorB,
340         const std::string& jarfileB, const std::string& titleB)
341 {
342     HCDBG(std::cerr << "HelpLinker::addBookmark " << thishid << " " <<
343         fileB << " " << anchorB << " " << jarfileB << " " << titleB << std::endl);
344 
345     thishid = URLEncoder::encode(thishid);
346 
347     DBT key;
348     memset(&key, 0, sizeof(key));
349     key.data = const_cast<char*>(thishid.c_str());
350     key.size = thishid.length();
351 
352     int fileLen = fileB.length();
353     if (!anchorB.empty())
354         fileLen += (1 + anchorB.length());
355     int dataLen = 1 + fileLen + 1 + jarfileB.length() + 1 + titleB.length();
356 
357     std::vector<unsigned char> dataB(dataLen);
358     size_t i = 0;
359     dataB[i++] = static_cast<unsigned char>(fileLen);
360     for (size_t j = 0; j < fileB.length(); ++j)
361         dataB[i++] = fileB[j];
362     if (!anchorB.empty())
363     {
364         dataB[i++] = '#';
365         for (size_t j = 0; j < anchorB.length(); ++j)
366             dataB[i++] = anchorB[j];
367     }
368     dataB[i++] = static_cast<unsigned char>(jarfileB.length());
369     for (size_t j = 0; j < jarfileB.length(); ++j)
370         dataB[i++] = jarfileB[j];
371 
372     dataB[i++] = static_cast<unsigned char>(titleB.length());
373     for (size_t j = 0; j < titleB.length(); ++j)
374         dataB[i++] = titleB[j];
375 
376     DBT data;
377     memset(&data, 0, sizeof(data));
378     data.data = &dataB[0];
379     data.size = dataB.size();
380 
381     if( dbBase != NULL )
382         dbBase->put(dbBase, NULL, &key, &data, 0);
383 
384     if( pFile_DBHelp != NULL )
385     {
386         std::string aValueStr( dataB.begin(), dataB.end() );
387         writeKeyValue_DBHelp( pFile_DBHelp, thishid, aValueStr );
388     }
389 }
390 
391 void HelpLinker::initIndexerPreProcessor()
392 {
393     if( m_pIndexerPreProcessor )
394         delete m_pIndexerPreProcessor;
395     std::string mod = module;
396     std::transform (mod.begin(), mod.end(), mod.begin(), tolower);
397     m_pIndexerPreProcessor = new IndexerPreProcessor( mod, indexDirParentName,
398          idxCaptionStylesheet, idxContentStylesheet );
399 }
400 
401 /**
402 *
403 */
404 void HelpLinker::link() throw( HelpProcessingException )
405 {
406     bool bIndexForExtension = true;
407 
408     if( bExtensionMode )
409     {
410         //indexDirParentName = sourceRoot;
411         indexDirParentName = extensionDestination;
412     }
413     else
414     {
415         indexDirParentName = zipdir;
416         fs::create_directory(indexDirParentName);
417     }
418 
419 #ifdef CMC_DEBUG
420     std::cerr << "will not delete tmpdir of " << indexDirParentName.native_file_string().c_str() << std::endl;
421 #endif
422 
423     std::string mod = module;
424     std::transform (mod.begin(), mod.end(), mod.begin(), tolower);
425 
426     // do the work here
427     // continue with introduction of the overall process thing into the
428     // here all hzip files will be worked on
429     std::string appl = mod;
430     if (appl[0] == 's')
431         appl = appl.substr(1);
432 
433     bool bUse_ = true;
434 #ifdef DBHELP_ONLY
435     if( !bExtensionMode )
436         bUse_ = false;
437 #endif
438 
439     DB* helpText(0);
440 #ifndef DBHELP_ONLY
441     fs::path helpTextFileName(indexDirParentName / (mod + ".ht"));
442     db_create(&helpText,0,0);
443     helpText->open(helpText, NULL, helpTextFileName.native_file_string().c_str(), NULL, DB_BTREE,
444         DB_CREATE | DB_TRUNCATE, 0644);
445 #endif
446 
447     fs::path helpTextFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".ht_" : ".ht")));
448 #ifdef WNT
449     //We need _wfopen to support long file paths on Windows XP
450     FILE* pFileHelpText_DBHelp = _wfopen
451         ( helpTextFileName_DBHelp.native_file_string_w(), L"wb" );
452 #else
453 
454     FILE* pFileHelpText_DBHelp = fopen
455         ( helpTextFileName_DBHelp.native_file_string().c_str(), "wb" );
456 #endif
457     DB* dbBase(0);
458 #ifndef DBHELP_ONLY
459     fs::path dbBaseFileName(indexDirParentName / (mod + ".db"));
460     db_create(&dbBase,0,0);
461     dbBase->open(dbBase, NULL, dbBaseFileName.native_file_string().c_str(), NULL, DB_BTREE,
462         DB_CREATE | DB_TRUNCATE, 0644);
463 #endif
464 
465     fs::path dbBaseFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".db_" : ".db")));
466 #ifdef WNT
467     //We need _wfopen to support long file paths on Windows XP
468     FILE* pFileDbBase_DBHelp = _wfopen
469         ( dbBaseFileName_DBHelp.native_file_string_w(), L"wb" );
470 #else
471     FILE* pFileDbBase_DBHelp = fopen
472         ( dbBaseFileName_DBHelp.native_file_string().c_str(), "wb" );
473 #endif
474 
475 #ifndef DBHELP_ONLY
476     DB* keyWord(0);
477     fs::path keyWordFileName(indexDirParentName / (mod + ".key"));
478     db_create(&keyWord,0,0);
479     keyWord->open(keyWord, NULL, keyWordFileName.native_file_string().c_str(), NULL, DB_BTREE,
480         DB_CREATE | DB_TRUNCATE, 0644);
481 #endif
482 
483     fs::path keyWordFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".key_" : ".key")));
484 
485     HelpKeyword helpKeyword;
486 
487     // catch HelpProcessingException to avoid locking data bases
488     try
489     {
490 
491     // lastly, initialize the indexBuilder
492     if ( (!bExtensionMode || bIndexForExtension) && !helpFiles.empty())
493         initIndexerPreProcessor();
494 
495     if( !bExtensionMode )
496     {
497 #ifndef OS2 // YD @TODO@ crashes libc runtime :-(
498         std::cout << "Making " << outputFile.native_file_string() <<
499             " from " << helpFiles.size() << " input files" << std::endl;
500 #endif
501     }
502 
503     // here we start our loop over the hzip files.
504     HashSet::iterator end = helpFiles.end();
505     for (HashSet::iterator iter = helpFiles.begin(); iter != end; ++iter)
506     {
507         if( !bExtensionMode )
508         {
509             std::cout << ".";
510             std::cout.flush();
511         }
512 
513         // process one file
514         // streamTable contains the streams in the hzip file
515         StreamTable streamTable;
516         const std::string &xhpFileName = *iter;
517 
518         if (!bExtensionMode && xhpFileName.rfind(".xhp") != xhpFileName.length()-4)
519         {
520             // only work on .xhp - files
521             std::cerr <<
522                 "ERROR: input list entry '"
523                     << xhpFileName
524                     << "' has the wrong extension (only files with extension .xhp "
525                     << "are accepted)";
526             continue;
527         }
528 
529         fs::path langsourceRoot(sourceRoot);
530         fs::path xhpFile;
531 
532         if( bExtensionMode )
533         {
534             // langsourceRoot == sourceRoot for extensions
535             std::string xhpFileNameComplete( extensionPath );
536             xhpFileNameComplete.append( '/' + xhpFileName );
537             xhpFile = fs::path( xhpFileNameComplete );
538         }
539         else
540         {
541             langsourceRoot.append('/' + lang + '/');
542             xhpFile = fs::path(xhpFileName, fs::native);
543         }
544 
545         HelpCompiler hc( streamTable, xhpFile, langsourceRoot,
546             embeddStylesheet, module, lang, bExtensionMode );
547 
548         HCDBG(std::cerr << "before compile of " << xhpFileName << std::endl);
549         bool success = hc.compile();
550         HCDBG(std::cerr << "after compile of " << xhpFileName << std::endl);
551 
552         if (!success && !bExtensionMode)
553         {
554             std::stringstream aStrStream;
555             aStrStream <<
556                 "\nERROR: compiling help particle '"
557                     << xhpFileName
558                     << "' for language '"
559                     << lang
560                     << "' failed!";
561             throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
562         }
563 
564         const std::string documentBaseId = streamTable.document_id;
565         std::string documentPath = streamTable.document_path;
566         if (documentPath.find("/") == 0)
567             documentPath = documentPath.substr(1);
568 
569         std::string documentJarfile = streamTable.document_module + ".jar";
570 
571         std::string documentTitle = streamTable.document_title;
572         if (documentTitle.empty())
573             documentTitle = "<notitle>";
574 
575 #if 0
576         std::cout << "for " << xhpFileName << " documentBaseId is " << documentBaseId << "\n";
577         std::cout << "for " << xhpFileName << " documentPath is " << documentPath << "\n";
578         std::cout << "for " << xhpFileName << " documentJarfile is " << documentJarfile << "\n";
579         std::cout << "for " << xhpFileName << " documentPath is " << documentTitle << "\n";
580 #endif
581 
582         const std::string& fileB = documentPath;
583         const std::string& jarfileB = documentJarfile;
584         std::string& titleB = documentTitle;
585 
586         // add once this as its own id.
587         addBookmark(dbBase, pFileDbBase_DBHelp, documentPath, fileB, std::string(), jarfileB, titleB);
588 
589         // first the database *.db
590         // ByteArrayInputStream bais = null;
591         // ObjectInputStream ois = null;
592 
593         const HashSet *hidlist = streamTable.appl_hidlist;
594         if (!hidlist)
595             hidlist = streamTable.default_hidlist;
596         if (hidlist && !hidlist->empty())
597         {
598             // now iterate over all elements of the hidlist
599             HashSet::const_iterator aEnd = hidlist->end();
600             for (HashSet::const_iterator hidListIter = hidlist->begin();
601                 hidListIter != aEnd; ++hidListIter)
602             {
603                 std::string thishid = *hidListIter;
604 
605                 std::string anchorB;
606                 size_t index = thishid.rfind('#');
607                 if (index != std::string::npos)
608                 {
609                     anchorB = thishid.substr(1 + index);
610                     thishid = thishid.substr(0, index);
611                 }
612                 addBookmark(dbBase, pFileDbBase_DBHelp, thishid, fileB, anchorB, jarfileB, titleB);
613             }
614         }
615 
616         // now the keywords
617         const Hashtable *anchorToLL = streamTable.appl_keywords;
618         if (!anchorToLL)
619             anchorToLL = streamTable.default_keywords;
620         if (anchorToLL && !anchorToLL->empty())
621         {
622             std::string fakedHid = URLEncoder::encode(documentPath);
623             Hashtable::const_iterator aEnd = anchorToLL->end();
624             for (Hashtable::const_iterator enumer = anchorToLL->begin();
625                 enumer != aEnd; ++enumer)
626             {
627                 const std::string &anchor = enumer->first;
628                 addBookmark(dbBase, pFileDbBase_DBHelp, documentPath, fileB,
629                     anchor, jarfileB, titleB);
630                 std::string totalId = fakedHid + "#" + anchor;
631                 // std::cerr << hzipFileName << std::endl;
632                 const LinkedList& ll = enumer->second;
633                 LinkedList::const_iterator aOtherEnd = ll.end();
634                 for (LinkedList::const_iterator llIter = ll.begin();
635                     llIter != aOtherEnd; ++llIter)
636                 {
637                         helpKeyword.insert(*llIter, totalId);
638                 }
639             }
640 
641         }
642 
643         // and last the helptexts
644         const Stringtable *helpTextHash = streamTable.appl_helptexts;
645         if (!helpTextHash)
646             helpTextHash = streamTable.default_helptexts;
647         if (helpTextHash && !helpTextHash->empty())
648         {
649             Stringtable::const_iterator aEnd = helpTextHash->end();
650             for (Stringtable::const_iterator helpTextIter = helpTextHash->begin();
651                 helpTextIter != aEnd; ++helpTextIter)
652             {
653                 std::string helpTextId = helpTextIter->first;
654                 const std::string& helpTextText = helpTextIter->second;
655 
656                 helpTextId = URLEncoder::encode(helpTextId);
657 
658                 DBT keyDbt;
659                 memset(&keyDbt, 0, sizeof(keyDbt));
660                 keyDbt.data = const_cast<char*>(helpTextId.c_str());
661                 keyDbt.size = helpTextId.length();
662 
663                 DBT textDbt;
664                 memset(&textDbt, 0, sizeof(textDbt));
665                 textDbt.data = const_cast<char*>(helpTextText.c_str());
666                 textDbt.size = helpTextText.length();
667 
668                 if( helpText != NULL )
669                     helpText->put(helpText, NULL, &keyDbt, &textDbt, 0);
670 
671                 if( pFileHelpText_DBHelp != NULL )
672                     writeKeyValue_DBHelp( pFileHelpText_DBHelp, helpTextId, helpTextText );
673             }
674         }
675 
676         //IndexerPreProcessor
677         if( !bExtensionMode || bIndexForExtension )
678         {
679             // now the indexing
680             xmlDocPtr document = streamTable.appl_doc;
681             if (!document)
682                 document = streamTable.default_doc;
683             if (document)
684             {
685                 std::string temp = module;
686                 std::transform (temp.begin(), temp.end(), temp.begin(), tolower);
687                 m_pIndexerPreProcessor->processDocument(document, URLEncoder::encode(documentPath) );
688             }
689         }
690 
691     } // while loop over hzip files ending
692     if( !bExtensionMode )
693         std::cout << std::endl;
694 
695     } // try
696     catch( const HelpProcessingException& )
697     {
698         // catch HelpProcessingException to avoid locking data bases
699 #ifndef DBHELP_ONLY
700         helpText->close(helpText, 0);
701         dbBase->close(dbBase, 0);
702         keyWord->close(keyWord, 0);
703 #endif
704         if( pFileHelpText_DBHelp != NULL )
705             fclose( pFileHelpText_DBHelp );
706         if( pFileDbBase_DBHelp != NULL )
707             fclose( pFileDbBase_DBHelp );
708         throw;
709     }
710 
711 #ifndef DBHELP_ONLY
712     helpText->close(helpText, 0);
713     dbBase->close(dbBase, 0);
714     helpKeyword.dump(keyWord);
715     keyWord->close(keyWord, 0);
716 #endif
717     if( pFileHelpText_DBHelp != NULL )
718         fclose( pFileHelpText_DBHelp );
719     if( pFileDbBase_DBHelp != NULL )
720         fclose( pFileDbBase_DBHelp );
721 
722     helpKeyword.dump_DBHelp( keyWordFileName_DBHelp);
723 
724     if( !bExtensionMode )
725     {
726         // New index
727         Stringtable::iterator aEnd = additionalFiles.end();
728         for (Stringtable::iterator enumer = additionalFiles.begin(); enumer != aEnd;
729             ++enumer)
730         {
731             const std::string &additionalFileName = enumer->second;
732             const std::string &additionalFileKey = enumer->first;
733 
734             fs::path fsAdditionalFileName( additionalFileName, fs::native );
735                 std::string aNativeStr = fsAdditionalFileName.native_file_string();
736                 const char* pStr = aNativeStr.c_str();
737                 std::cerr << pStr;
738 
739             fs::path fsTargetName( indexDirParentName / additionalFileKey );
740 
741             fs::copy( fsAdditionalFileName, fsTargetName );
742         }
743     }
744 
745 /*
746     /////////////////////////////////////////////////////////////////////////
747     /// remove temprary directory for index creation
748     /////////////////////////////////////////////////////////////////////////
749 #ifndef CMC_DEBUG
750     if( !bExtensionMode )
751         fs::remove_all( indexDirParentName );
752 #endif
753 */
754 }
755 
756 
757 void HelpLinker::main( std::vector<std::string> &args,
758                        std::string* pExtensionPath, std::string* pDestination,
759                        const rtl::OUString* pOfficeHelpPath )
760     throw( HelpProcessingException )
761 {
762     bExtensionMode = false;
763     helpFiles.clear();
764 
765     if (args.size() > 0 && args[0][0] == '@')
766     {
767         std::vector<std::string> stringList;
768         std::string strBuf;
769         std::ifstream fileReader(args[0].substr(1).c_str());
770 
771         while (fileReader)
772         {
773             std::string token;
774             fileReader >> token;
775             if (!token.empty())
776                 stringList.push_back(token);
777         }
778         fileReader.close();
779 
780         args = stringList;
781     }
782 
783     size_t i = 0;
784     bool bSrcOption = false;
785     while (i < args.size())
786     {
787         if (args[i].compare("-extlangsrc") == 0)
788         {
789             ++i;
790             if (i >= args.size())
791             {
792                 std::stringstream aStrStream;
793                 aStrStream << "extension source missing" << std::endl;
794                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
795             }
796             extsource = args[i];
797         }
798         else if (args[i].compare("-extlangdest") == 0)
799         {
800             //If this argument is not provided then the location provided in -extsource will
801             //also be the destination
802             ++i;
803             if (i >= args.size())
804             {
805                 std::stringstream aStrStream;
806                 aStrStream << "extension destination missing" << std::endl;
807                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
808             }
809             extdestination = args[i];
810         }
811         else if (args[i].compare("-src") == 0)
812         {
813             ++i;
814             if (i >= args.size())
815             {
816                 std::stringstream aStrStream;
817                 aStrStream << "sourceroot missing" << std::endl;
818                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
819             }
820             bSrcOption = true;
821             sourceRoot = fs::path(args[i], fs::native);
822         }
823         else if (args[i].compare("-sty") == 0)
824         {
825             ++i;
826             if (i >= args.size())
827             {
828                 std::stringstream aStrStream;
829                 aStrStream << "embeddingStylesheet missing" << std::endl;
830                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
831             }
832 
833             embeddStylesheet = fs::path(args[i], fs::native);
834         }
835         else if (args[i].compare("-zipdir") == 0)
836         {
837             ++i;
838             if (i >= args.size())
839             {
840                 std::stringstream aStrStream;
841                 aStrStream << "idxtemp missing" << std::endl;
842                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
843             }
844 
845             zipdir = fs::path(args[i], fs::native);
846         }
847         else if (args[i].compare("-idxcaption") == 0)
848         {
849             ++i;
850             if (i >= args.size())
851             {
852                 std::stringstream aStrStream;
853                 aStrStream << "idxcaption stylesheet missing" << std::endl;
854                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
855             }
856 
857             idxCaptionStylesheet = fs::path(args[i], fs::native);
858         }
859         else if (args[i].compare("-idxcontent") == 0)
860         {
861             ++i;
862             if (i >= args.size())
863             {
864                 std::stringstream aStrStream;
865                 aStrStream << "idxcontent stylesheet missing" << std::endl;
866                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
867             }
868 
869             idxContentStylesheet = fs::path(args[i], fs::native);
870         }
871         else if (args[i].compare("-o") == 0)
872         {
873             ++i;
874             if (i >= args.size())
875             {
876                 std::stringstream aStrStream;
877                 aStrStream << "outputfilename missing" << std::endl;
878                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
879             }
880 
881             outputFile = fs::path(args[i], fs::native);
882         }
883         else if (args[i].compare("-mod") == 0)
884         {
885             ++i;
886             if (i >= args.size())
887             {
888                 std::stringstream aStrStream;
889                 aStrStream << "module name missing" << std::endl;
890                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
891             }
892 
893             module = args[i];
894         }
895         else if (args[i].compare("-lang") == 0)
896         {
897             ++i;
898             if (i >= args.size())
899             {
900                 std::stringstream aStrStream;
901                 aStrStream << "language name missing" << std::endl;
902                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
903             }
904 
905             lang = args[i];
906         }
907         else if (args[i].compare("-hid") == 0)
908         {
909             ++i;
910             throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, "obsolete -hid argument used" );
911         }
912         else if (args[i].compare("-add") == 0)
913         {
914             std::string addFile, addFileUnderPath;
915             ++i;
916             if (i >= args.size())
917             {
918                 std::stringstream aStrStream;
919                 aStrStream << "pathname missing" << std::endl;
920                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
921             }
922 
923             addFileUnderPath = args[i];
924             ++i;
925             if (i >= args.size())
926             {
927                 std::stringstream aStrStream;
928                 aStrStream << "pathname missing" << std::endl;
929                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
930             }
931             addFile = args[i];
932             if (!addFileUnderPath.empty() && !addFile.empty())
933                 additionalFiles[addFileUnderPath] = addFile;
934         }
935         else
936             helpFiles.push_back(args[i]);
937         ++i;
938     }
939 
940     //We can be called from the helplinker executable or the extension manager
941     //In the latter case extsource is not used.
942     if( (pExtensionPath && pExtensionPath->length() > 0 && pOfficeHelpPath)
943         || !extsource.empty())
944     {
945         bExtensionMode = true;
946         if (!extsource.empty())
947         {
948             //called from helplinker.exe, pExtensionPath and pOfficeHelpPath
949             //should be NULL
950             sourceRoot = fs::path(extsource, fs::native);
951             extensionPath = sourceRoot.toUTF8();
952 
953             if (extdestination.empty())
954             {
955                 std::stringstream aStrStream;
956                 aStrStream << "-extlangdest is missing" << std::endl;
957                 throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
958             }
959             else
960             {
961                 //Convert from system path to file URL!!!
962                 fs::path p(extdestination, fs::native);
963                 extensionDestination = p.toUTF8();
964             }
965         }
966         else
967         { //called from extension manager
968             extensionPath = *pExtensionPath;
969             sourceRoot = fs::path(extensionPath);
970             extensionDestination = *pDestination;
971         }
972         //check if -src option was used. This option must not be used
973         //when extension help is compiled.
974         if (bSrcOption)
975         {
976             std::stringstream aStrStream;
977             aStrStream << "-src must not be used together with -extsource missing" << std::endl;
978             throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
979         }
980     }
981 
982     if (!bExtensionMode && zipdir.empty())
983     {
984         std::stringstream aStrStream;
985         aStrStream << "no index dir given" << std::endl;
986         throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
987     }
988 
989     if (!bExtensionMode && idxCaptionStylesheet.empty()
990         || !extsource.empty() && idxCaptionStylesheet.empty())
991     {
992         //No extension mode and extension mode using commandline
993         //!extsource.empty indicates extension mode using commandline
994         // -idxcaption paramter is required
995         std::stringstream aStrStream;
996         aStrStream << "no index caption stylesheet given" << std::endl;
997         throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
998     }
999     else if ( bExtensionMode &&  extsource.empty())
1000     {
1001         //This part is used when compileExtensionHelp is called from the extensions manager.
1002         //If extension help is compiled using helplinker in the build process
1003         rtl::OUString aIdxCaptionPathFileURL( *pOfficeHelpPath );
1004         aIdxCaptionPathFileURL += rtl::OUString::createFromAscii( "/idxcaption.xsl" );
1005 
1006         rtl::OString aOStr_IdxCaptionPathFileURL( rtl::OUStringToOString
1007             ( aIdxCaptionPathFileURL, fs::getThreadTextEncoding() ) );
1008         std::string aStdStr_IdxCaptionPathFileURL( aOStr_IdxCaptionPathFileURL.getStr() );
1009 
1010         idxCaptionStylesheet = fs::path( aStdStr_IdxCaptionPathFileURL );
1011     }
1012 
1013     if (!bExtensionMode && idxContentStylesheet.empty()
1014         || !extsource.empty() && idxContentStylesheet.empty())
1015     {
1016         //No extension mode and extension mode using commandline
1017         //!extsource.empty indicates extension mode using commandline
1018         // -idxcontent paramter is required
1019         std::stringstream aStrStream;
1020         aStrStream << "no index content stylesheet given" << std::endl;
1021         throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
1022     }
1023     else if ( bExtensionMode && extsource.empty())
1024     {
1025         //If extension help is compiled using helplinker in the build process
1026         //then  -idxcontent must be supplied
1027         //This part is used when compileExtensionHelp is called from the extensions manager.
1028         rtl::OUString aIdxContentPathFileURL( *pOfficeHelpPath );
1029         aIdxContentPathFileURL += rtl::OUString::createFromAscii( "/idxcontent.xsl" );
1030 
1031         rtl::OString aOStr_IdxContentPathFileURL( rtl::OUStringToOString
1032             ( aIdxContentPathFileURL, fs::getThreadTextEncoding() ) );
1033         std::string aStdStr_IdxContentPathFileURL( aOStr_IdxContentPathFileURL.getStr() );
1034 
1035         idxContentStylesheet = fs::path( aStdStr_IdxContentPathFileURL );
1036     }
1037     if (!bExtensionMode && embeddStylesheet.empty())
1038     {
1039         std::stringstream aStrStream;
1040         aStrStream << "no embedding resolving file given" << std::endl;
1041         throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
1042     }
1043     if (sourceRoot.empty())
1044     {
1045         std::stringstream aStrStream;
1046         aStrStream << "no sourceroot given" << std::endl;
1047         throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
1048     }
1049     if (!bExtensionMode && outputFile.empty())
1050     {
1051         std::stringstream aStrStream;
1052         aStrStream << "no output file given" << std::endl;
1053         throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
1054     }
1055     if (module.empty())
1056     {
1057         std::stringstream aStrStream;
1058         aStrStream << "module missing" << std::endl;
1059         throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
1060     }
1061     if (!bExtensionMode && lang.empty())
1062     {
1063         std::stringstream aStrStream;
1064         aStrStream << "language missing" << std::endl;
1065         throw HelpProcessingException( HELPPROCESSING_GENERAL_ERROR, aStrStream.str() );
1066     }
1067     link();
1068 }
1069 
1070 int main(int argc, char**argv)
1071 {
1072     sal_uInt32 starttime = osl_getGlobalTimer();
1073     std::vector<std::string> args;
1074     for (int i = 1; i < argc; ++i)
1075         args.push_back(std::string(argv[i]));
1076     try
1077     {
1078         HelpLinker* pHelpLinker = new HelpLinker();
1079         pHelpLinker->main( args );
1080         delete pHelpLinker;
1081     }
1082     catch( const HelpProcessingException& e )
1083     {
1084         std::cerr << e.m_aErrorMsg;
1085         exit(1);
1086     }
1087     sal_uInt32 endtime = osl_getGlobalTimer();
1088 #ifndef OS2 // YD @TODO@ crashes libc runtime :-(
1089     std::cout << "time taken was " << (endtime-starttime)/1000.0 << " seconds" << std::endl;
1090 #endif
1091     return 0;
1092 }
1093 
1094 // Variable to set an exception in "C" StructuredXMLErrorFunction
1095 static const HelpProcessingException* GpXMLParsingException = NULL;
1096 
1097 extern "C" void StructuredXMLErrorFunction(void *userData, xmlErrorPtr error)
1098 {
1099     (void)userData;
1100     (void)error;
1101 
1102     std::string aErrorMsg = error->message;
1103     std::string aXMLParsingFile;
1104     if( error->file != NULL )
1105         aXMLParsingFile = error->file;
1106     int nXMLParsingLine = error->line;
1107     HelpProcessingException* pException = new HelpProcessingException( aErrorMsg, aXMLParsingFile, nXMLParsingLine );
1108     GpXMLParsingException = pException;
1109 
1110     // Reset error handler
1111     xmlSetStructuredErrorFunc( NULL, NULL );
1112 }
1113 
1114 HelpProcessingErrorInfo& HelpProcessingErrorInfo::operator=( const struct HelpProcessingException& e )
1115 {
1116     m_eErrorClass = e.m_eErrorClass;
1117     rtl::OString tmpErrorMsg( e.m_aErrorMsg.c_str() );
1118     m_aErrorMsg = rtl::OStringToOUString( tmpErrorMsg, fs::getThreadTextEncoding() );
1119     rtl::OString tmpXMLParsingFile( e.m_aXMLParsingFile.c_str() );
1120     m_aXMLParsingFile = rtl::OStringToOUString( tmpXMLParsingFile, fs::getThreadTextEncoding() );
1121     m_nXMLParsingLine = e.m_nXMLParsingLine;
1122     return *this;
1123 }
1124 
1125 
1126 // Returns true in case of success, false in case of error
1127 HELPLINKER_DLLPUBLIC bool compileExtensionHelp
1128 (
1129     const rtl::OUString& aOfficeHelpPath,
1130     const rtl::OUString& aExtensionName,
1131     const rtl::OUString& aExtensionLanguageRoot,
1132     sal_Int32 nXhpFileCount, const rtl::OUString* pXhpFiles,
1133     const rtl::OUString& aDestination,
1134     HelpProcessingErrorInfo& o_rHelpProcessingErrorInfo
1135 )
1136 {
1137     bool bSuccess = true;
1138 
1139     std::vector<std::string> args;
1140     args.reserve(nXhpFileCount + 2);
1141     args.push_back(std::string("-mod"));
1142     rtl::OString aOExtensionName = rtl::OUStringToOString( aExtensionName, fs::getThreadTextEncoding() );
1143     args.push_back(std::string(aOExtensionName.getStr()));
1144 
1145     for( sal_Int32 iXhp = 0 ; iXhp < nXhpFileCount ; ++iXhp )
1146     {
1147         rtl::OUString aXhpFile = pXhpFiles[iXhp];
1148 
1149         rtl::OString aOXhpFile = rtl::OUStringToOString( aXhpFile, fs::getThreadTextEncoding() );
1150         args.push_back(std::string(aOXhpFile.getStr()));
1151     }
1152 
1153     rtl::OString aOExtensionLanguageRoot = rtl::OUStringToOString( aExtensionLanguageRoot, fs::getThreadTextEncoding() );
1154     const char* pExtensionPath = aOExtensionLanguageRoot.getStr();
1155     std::string aStdStrExtensionPath = pExtensionPath;
1156     rtl::OString aODestination = rtl::OUStringToOString(aDestination, fs::getThreadTextEncoding());
1157     const char* pDestination = aODestination.getStr();
1158     std::string aStdStrDestination = pDestination;
1159 
1160     // Set error handler
1161     xmlSetStructuredErrorFunc( NULL, (xmlStructuredErrorFunc)StructuredXMLErrorFunction );
1162     try
1163     {
1164         HelpLinker* pHelpLinker = new HelpLinker();
1165         pHelpLinker->main( args, &aStdStrExtensionPath, &aStdStrDestination, &aOfficeHelpPath );
1166         delete pHelpLinker;
1167     }
1168     catch( const HelpProcessingException& e )
1169     {
1170         if( GpXMLParsingException != NULL )
1171         {
1172             o_rHelpProcessingErrorInfo = *GpXMLParsingException;
1173             delete GpXMLParsingException;
1174             GpXMLParsingException = NULL;
1175         }
1176         else
1177         {
1178             o_rHelpProcessingErrorInfo = e;
1179         }
1180         bSuccess = false;
1181     }
1182     // Reset error handler
1183     xmlSetStructuredErrorFunc( NULL, NULL );
1184 
1185     // i83624: Tree files
1186     ::rtl::OUString aTreeFileURL = aExtensionLanguageRoot;
1187     aTreeFileURL += rtl::OUString::createFromAscii( "/help.tree" );
1188     osl::DirectoryItem aTreeFileItem;
1189     osl::FileBase::RC rcGet = osl::DirectoryItem::get( aTreeFileURL, aTreeFileItem );
1190     osl::FileStatus aFileStatus( FileStatusMask_FileSize );
1191     if( rcGet == osl::FileBase::E_None &&
1192         aTreeFileItem.getFileStatus( aFileStatus ) == osl::FileBase::E_None &&
1193         aFileStatus.isValid( FileStatusMask_FileSize ) )
1194     {
1195         sal_uInt64 ret, len = aFileStatus.getFileSize();
1196         char* s = new char[ int(len) ];  // the buffer to hold the installed files
1197         osl::File aFile( aTreeFileURL );
1198         aFile.open( OpenFlag_Read );
1199         aFile.read( s, len, ret );
1200         aFile.close();
1201 
1202         XML_Parser parser = XML_ParserCreate( 0 );
1203         int parsed = XML_Parse( parser, s, int( len ), true );
1204 
1205         if( parsed == 0 )
1206         {
1207             XML_Error nError = XML_GetErrorCode( parser );
1208             o_rHelpProcessingErrorInfo.m_eErrorClass = HELPPROCESSING_XMLPARSING_ERROR;
1209             o_rHelpProcessingErrorInfo.m_aErrorMsg = rtl::OUString::createFromAscii( XML_ErrorString( nError ) );;
1210             o_rHelpProcessingErrorInfo.m_aXMLParsingFile = aTreeFileURL;
1211             // CRAHSES!!! o_rHelpProcessingErrorInfo.m_nXMLParsingLine = XML_GetCurrentLineNumber( parser );
1212             bSuccess = false;
1213         }
1214 
1215         XML_ParserFree( parser );
1216         delete[] s;
1217     }
1218 
1219     return bSuccess;
1220 }
1221 
1222