1*983d4c8aSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*983d4c8aSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*983d4c8aSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*983d4c8aSAndrew Rist  * distributed with this work for additional information
6*983d4c8aSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*983d4c8aSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*983d4c8aSAndrew Rist  * "License"); you may not use this file except in compliance
9*983d4c8aSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*983d4c8aSAndrew Rist  *
11*983d4c8aSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*983d4c8aSAndrew Rist  *
13*983d4c8aSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*983d4c8aSAndrew Rist  * software distributed under the License is distributed on an
15*983d4c8aSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*983d4c8aSAndrew Rist  * KIND, either express or implied.  See the License for the
17*983d4c8aSAndrew Rist  * specific language governing permissions and limitations
18*983d4c8aSAndrew Rist  * under the License.
19*983d4c8aSAndrew Rist  *
20*983d4c8aSAndrew Rist  *************************************************************/
21*983d4c8aSAndrew Rist 
22*983d4c8aSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir #ifndef HELPCOMPILER_HXX
25cdf0e10cSrcweir #define HELPCOMPILER_HXX
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <string>
28cdf0e10cSrcweir #include <hash_map>
29cdf0e10cSrcweir #include <vector>
30cdf0e10cSrcweir #include <list>
31cdf0e10cSrcweir #include <fstream>
32cdf0e10cSrcweir #include <iostream>
33cdf0e10cSrcweir #include <sstream>
34cdf0e10cSrcweir #include <algorithm>
35cdf0e10cSrcweir #include <ctype.h>
36cdf0e10cSrcweir #ifdef SYSTEM_DB
37cdf0e10cSrcweir #include <db.h>
38cdf0e10cSrcweir #else
39cdf0e10cSrcweir #include <berkeleydb/db.h>
40cdf0e10cSrcweir #endif
41cdf0e10cSrcweir 
42cdf0e10cSrcweir #include <boost/shared_ptr.hpp>
43cdf0e10cSrcweir 
44cdf0e10cSrcweir #include <libxml/xmlmemory.h>
45cdf0e10cSrcweir #include <libxml/debugXML.h>
46cdf0e10cSrcweir #include <libxml/HTMLtree.h>
47cdf0e10cSrcweir #include <libxml/xmlIO.h>
48cdf0e10cSrcweir #include <libxml/xinclude.h>
49cdf0e10cSrcweir #include <libxml/catalog.h>
50cdf0e10cSrcweir 
51cdf0e10cSrcweir #include <rtl/ustring.hxx>
52cdf0e10cSrcweir #include <osl/thread.h>
53cdf0e10cSrcweir #include <osl/process.h>
54cdf0e10cSrcweir #include <osl/file.hxx>
55cdf0e10cSrcweir 
56cdf0e10cSrcweir #include <compilehelp.hxx>
57cdf0e10cSrcweir 
58cdf0e10cSrcweir #define EMULATEORIGINAL 1
59cdf0e10cSrcweir 
60cdf0e10cSrcweir #ifdef CMCDEBUG
61cdf0e10cSrcweir     #define HCDBG(foo) do { if (1) foo; } while(0)
62cdf0e10cSrcweir #else
63cdf0e10cSrcweir     #define HCDBG(foo) do { if (0) foo; } while(0)
64cdf0e10cSrcweir #endif
65cdf0e10cSrcweir 
66cdf0e10cSrcweir namespace fs
67cdf0e10cSrcweir {
68cdf0e10cSrcweir 	rtl_TextEncoding getThreadTextEncoding( void );
69cdf0e10cSrcweir 
70cdf0e10cSrcweir     enum convert { native };
71cdf0e10cSrcweir     class path
72cdf0e10cSrcweir     {
73cdf0e10cSrcweir     public:
74cdf0e10cSrcweir         ::rtl::OUString data;
75cdf0e10cSrcweir     public:
76cdf0e10cSrcweir         path() {}
77cdf0e10cSrcweir         path(const path &rOther) : data(rOther.data) {}
78cdf0e10cSrcweir         path(const std::string &in, convert)
79cdf0e10cSrcweir         {
80cdf0e10cSrcweir             rtl::OUString sWorkingDir;
81cdf0e10cSrcweir             osl_getProcessWorkingDir(&sWorkingDir.pData);
82cdf0e10cSrcweir 
83cdf0e10cSrcweir             rtl::OString tmp(in.c_str());
84cdf0e10cSrcweir             rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding()));
85cdf0e10cSrcweir             osl::File::getFileURLFromSystemPath(ustrSystemPath, data);
86cdf0e10cSrcweir             osl::File::getAbsoluteFileURL(sWorkingDir, data, data);
87cdf0e10cSrcweir         }
88cdf0e10cSrcweir         path(const std::string &FileURL)
89cdf0e10cSrcweir 		{
90cdf0e10cSrcweir             rtl::OString tmp(FileURL.c_str());
91cdf0e10cSrcweir             data = rtl::OStringToOUString(tmp, getThreadTextEncoding());
92cdf0e10cSrcweir 		}
93cdf0e10cSrcweir         std::string native_file_string() const
94cdf0e10cSrcweir         {
95cdf0e10cSrcweir             ::rtl::OUString ustrSystemPath;
96cdf0e10cSrcweir             osl::File::getSystemPathFromFileURL(data, ustrSystemPath);
97cdf0e10cSrcweir             rtl::OString tmp(rtl::OUStringToOString(ustrSystemPath, getThreadTextEncoding()));
98cdf0e10cSrcweir             HCDBG(std::cerr << "native_file_string is " << tmp.getStr() << std::endl);
99cdf0e10cSrcweir             return std::string(tmp.getStr());
100cdf0e10cSrcweir         }
101cdf0e10cSrcweir #ifdef WNT
102cdf0e10cSrcweir         wchar_t const * native_file_string_w() const
103cdf0e10cSrcweir         {
104cdf0e10cSrcweir             ::rtl::OUString ustrSystemPath;
105cdf0e10cSrcweir             osl::File::getSystemPathFromFileURL(data, ustrSystemPath);
106cdf0e10cSrcweir             return reinterpret_cast< wchar_t const * >(ustrSystemPath.getStr());
107cdf0e10cSrcweir         }
108cdf0e10cSrcweir #endif
109cdf0e10cSrcweir         std::string native_directory_string() const { return native_file_string(); }
110cdf0e10cSrcweir         std::string toUTF8() const
111cdf0e10cSrcweir         {
112cdf0e10cSrcweir             rtl::OString tmp(rtl::OUStringToOString(data, RTL_TEXTENCODING_UTF8));
113cdf0e10cSrcweir             return std::string(tmp.getStr());
114cdf0e10cSrcweir         }
115cdf0e10cSrcweir         bool empty() const { return data.getLength() == 0; }
116cdf0e10cSrcweir         path operator/(const std::string &in) const
117cdf0e10cSrcweir         {
118cdf0e10cSrcweir             path ret(*this);
119cdf0e10cSrcweir             HCDBG(std::cerr << "orig was " <<
120cdf0e10cSrcweir                 rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl);
121cdf0e10cSrcweir             rtl::OString tmp(in.c_str());
122cdf0e10cSrcweir             rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding()));
123cdf0e10cSrcweir             ret.data += rtl::OUString(sal_Unicode('/'));
124cdf0e10cSrcweir             ret.data += ustrSystemPath;
125cdf0e10cSrcweir             HCDBG(std::cerr << "final is " <<
126cdf0e10cSrcweir                 rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl);
127cdf0e10cSrcweir             return ret;
128cdf0e10cSrcweir         }
129cdf0e10cSrcweir         void append(const char *in)
130cdf0e10cSrcweir         {
131cdf0e10cSrcweir             rtl::OString tmp(in);
132cdf0e10cSrcweir             rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding()));
133cdf0e10cSrcweir             data = data + ustrSystemPath;
134cdf0e10cSrcweir         }
135cdf0e10cSrcweir         void append(const std::string &in) { append(in.c_str()); }
136cdf0e10cSrcweir     };
137cdf0e10cSrcweir 
138cdf0e10cSrcweir     void create_directory(const fs::path indexDirName);
139cdf0e10cSrcweir     void rename(const fs::path &src, const fs::path &dest);
140cdf0e10cSrcweir     void copy(const fs::path &src, const fs::path &dest);
141cdf0e10cSrcweir     bool exists(const fs::path &in);
142cdf0e10cSrcweir     void remove_all(const fs::path &in);
143cdf0e10cSrcweir     void remove(const fs::path &in);
144cdf0e10cSrcweir }
145cdf0e10cSrcweir 
146cdf0e10cSrcweir struct joaat_hash
147cdf0e10cSrcweir {
148cdf0e10cSrcweir     size_t operator()(const std::string &str) const
149cdf0e10cSrcweir     {
150cdf0e10cSrcweir         size_t hash = 0;
151cdf0e10cSrcweir         const char *key = str.data();
152cdf0e10cSrcweir         for (size_t i = 0; i < str.size(); i++)
153cdf0e10cSrcweir         {
154cdf0e10cSrcweir             hash += key[i];
155cdf0e10cSrcweir             hash += (hash << 10);
156cdf0e10cSrcweir             hash ^= (hash >> 6);
157cdf0e10cSrcweir         }
158cdf0e10cSrcweir         hash += (hash << 3);
159cdf0e10cSrcweir         hash ^= (hash >> 11);
160cdf0e10cSrcweir         hash += (hash << 15);
161cdf0e10cSrcweir         return hash;
162cdf0e10cSrcweir     }
163cdf0e10cSrcweir };
164cdf0e10cSrcweir 
165cdf0e10cSrcweir #define get16bits(d) ((((sal_uInt32)(((const sal_uInt8 *)(d))[1])) << 8)\
166cdf0e10cSrcweir                        +(sal_uInt32)(((const sal_uInt8 *)(d))[0]) )
167cdf0e10cSrcweir 
168cdf0e10cSrcweir struct SuperFastHash
169cdf0e10cSrcweir {
170cdf0e10cSrcweir     size_t operator()(const std::string &str) const
171cdf0e10cSrcweir     {
172cdf0e10cSrcweir         const char * data = str.data();
173cdf0e10cSrcweir         int len = str.size();
174cdf0e10cSrcweir         size_t hash = len, tmp;
175cdf0e10cSrcweir         if (len <= 0 || data == NULL) return 0;
176cdf0e10cSrcweir 
177cdf0e10cSrcweir         int rem = len & 3;
178cdf0e10cSrcweir         len >>= 2;
179cdf0e10cSrcweir 
180cdf0e10cSrcweir         /* Main loop */
181cdf0e10cSrcweir         for (;len > 0; len--)
182cdf0e10cSrcweir         {
183cdf0e10cSrcweir             hash  += get16bits (data);
184cdf0e10cSrcweir             tmp    = (get16bits (data+2) << 11) ^ hash;
185cdf0e10cSrcweir             hash   = (hash << 16) ^ tmp;
186cdf0e10cSrcweir             data  += 2*sizeof (sal_uInt16);
187cdf0e10cSrcweir             hash  += hash >> 11;
188cdf0e10cSrcweir         }
189cdf0e10cSrcweir 
190cdf0e10cSrcweir         /* Handle end cases */
191cdf0e10cSrcweir         switch (rem)
192cdf0e10cSrcweir         {
193cdf0e10cSrcweir             case 3: hash += get16bits (data);
194cdf0e10cSrcweir                     hash ^= hash << 16;
195cdf0e10cSrcweir                     hash ^= data[sizeof (sal_uInt16)] << 18;
196cdf0e10cSrcweir                     hash += hash >> 11;
197cdf0e10cSrcweir                     break;
198cdf0e10cSrcweir             case 2: hash += get16bits (data);
199cdf0e10cSrcweir                     hash ^= hash << 11;
200cdf0e10cSrcweir                     hash += hash >> 17;
201cdf0e10cSrcweir                     break;
202cdf0e10cSrcweir             case 1: hash += *data;
203cdf0e10cSrcweir                     hash ^= hash << 10;
204cdf0e10cSrcweir                     hash += hash >> 1;
205cdf0e10cSrcweir         }
206cdf0e10cSrcweir 
207cdf0e10cSrcweir         /* Force "avalanching" of final 127 bits */
208cdf0e10cSrcweir         hash ^= hash << 3;
209cdf0e10cSrcweir         hash += hash >> 5;
210cdf0e10cSrcweir         hash ^= hash << 4;
211cdf0e10cSrcweir         hash += hash >> 17;
212cdf0e10cSrcweir         hash ^= hash << 25;
213cdf0e10cSrcweir         hash += hash >> 6;
214cdf0e10cSrcweir 
215cdf0e10cSrcweir         return hash;
216cdf0e10cSrcweir     }
217cdf0e10cSrcweir };
218cdf0e10cSrcweir 
219cdf0e10cSrcweir #define pref_hash joaat_hash
220cdf0e10cSrcweir 
221cdf0e10cSrcweir typedef std::hash_map<std::string, std::string, pref_hash> Stringtable;
222cdf0e10cSrcweir typedef std::list<std::string> LinkedList;
223cdf0e10cSrcweir typedef std::vector<std::string> HashSet;
224cdf0e10cSrcweir 
225cdf0e10cSrcweir typedef std::hash_map<std::string, LinkedList, pref_hash> Hashtable;
226cdf0e10cSrcweir 
227cdf0e10cSrcweir class StreamTable
228cdf0e10cSrcweir {
229cdf0e10cSrcweir public:
230cdf0e10cSrcweir     std::string document_id;
231cdf0e10cSrcweir     std::string document_path;
232cdf0e10cSrcweir     std::string document_module;
233cdf0e10cSrcweir     std::string document_title;
234cdf0e10cSrcweir 
235cdf0e10cSrcweir     HashSet *appl_hidlist;
236cdf0e10cSrcweir     Hashtable *appl_keywords;
237cdf0e10cSrcweir     Stringtable *appl_helptexts;
238cdf0e10cSrcweir     xmlDocPtr appl_doc;
239cdf0e10cSrcweir 
240cdf0e10cSrcweir     HashSet *default_hidlist;
241cdf0e10cSrcweir     Hashtable *default_keywords;
242cdf0e10cSrcweir     Stringtable *default_helptexts;
243cdf0e10cSrcweir     xmlDocPtr default_doc;
244cdf0e10cSrcweir 
245cdf0e10cSrcweir     StreamTable() :
246cdf0e10cSrcweir         appl_hidlist(NULL), appl_keywords(NULL), appl_helptexts(NULL), appl_doc(NULL),
247cdf0e10cSrcweir         default_hidlist(NULL), default_keywords(NULL), default_helptexts(NULL), default_doc(NULL)
248cdf0e10cSrcweir     {}
249cdf0e10cSrcweir     void dropdefault()
250cdf0e10cSrcweir     {
251cdf0e10cSrcweir         delete default_hidlist;
252cdf0e10cSrcweir         delete default_keywords;
253cdf0e10cSrcweir         delete default_helptexts;
254cdf0e10cSrcweir         if (default_doc) xmlFreeDoc(default_doc);
255cdf0e10cSrcweir     }
256cdf0e10cSrcweir     void dropappl()
257cdf0e10cSrcweir     {
258cdf0e10cSrcweir         delete appl_hidlist;
259cdf0e10cSrcweir         delete appl_keywords;
260cdf0e10cSrcweir         delete appl_helptexts;
261cdf0e10cSrcweir         if (appl_doc) xmlFreeDoc(appl_doc);
262cdf0e10cSrcweir     }
263cdf0e10cSrcweir     ~StreamTable()
264cdf0e10cSrcweir     {
265cdf0e10cSrcweir         dropappl();
266cdf0e10cSrcweir         dropdefault();
267cdf0e10cSrcweir     }
268cdf0e10cSrcweir };
269cdf0e10cSrcweir 
270cdf0e10cSrcweir struct HelpProcessingException
271cdf0e10cSrcweir {
272cdf0e10cSrcweir 	HelpProcessingErrorClass		m_eErrorClass;
273cdf0e10cSrcweir 	std::string						m_aErrorMsg;
274cdf0e10cSrcweir 	std::string						m_aXMLParsingFile;
275cdf0e10cSrcweir 	int								m_nXMLParsingLine;
276cdf0e10cSrcweir 
277cdf0e10cSrcweir 	HelpProcessingException( HelpProcessingErrorClass eErrorClass, const std::string& aErrorMsg )
278cdf0e10cSrcweir 		: m_eErrorClass( eErrorClass )
279cdf0e10cSrcweir 		, m_aErrorMsg( aErrorMsg )
280cdf0e10cSrcweir 	{}
281cdf0e10cSrcweir 	HelpProcessingException( const std::string& aErrorMsg, const std::string& aXMLParsingFile, int nXMLParsingLine )
282cdf0e10cSrcweir 		: m_eErrorClass( HELPPROCESSING_XMLPARSING_ERROR )
283cdf0e10cSrcweir 		, m_aErrorMsg( aErrorMsg )
284cdf0e10cSrcweir 		, m_aXMLParsingFile( aXMLParsingFile )
285cdf0e10cSrcweir 		, m_nXMLParsingLine( nXMLParsingLine )
286cdf0e10cSrcweir 	{}
287cdf0e10cSrcweir };
288cdf0e10cSrcweir 
289cdf0e10cSrcweir class HelpCompiler
290cdf0e10cSrcweir {
291cdf0e10cSrcweir public:
292cdf0e10cSrcweir     HelpCompiler(StreamTable &streamTable,
293cdf0e10cSrcweir                 const fs::path &in_inputFile,
294cdf0e10cSrcweir                 const fs::path &in_src,
295cdf0e10cSrcweir                 const fs::path &in_resEmbStylesheet,
296cdf0e10cSrcweir                 const std::string &in_module,
297cdf0e10cSrcweir                 const std::string &in_lang,
298cdf0e10cSrcweir 				bool in_bExtensionMode);
299cdf0e10cSrcweir     bool compile( void ) throw (HelpProcessingException);
300cdf0e10cSrcweir     void addEntryToJarFile(const std::string &prefix,
301cdf0e10cSrcweir         const std::string &entryName, const std::string &bytesToAdd);
302cdf0e10cSrcweir     void addEntryToJarFile(const std::string &prefix,
303cdf0e10cSrcweir                 const std::string &entryName, const HashSet &bytesToAdd);
304cdf0e10cSrcweir     void addEntryToJarFile(const std::string &prefix,
305cdf0e10cSrcweir                 const std::string &entryName, const Stringtable &bytesToAdd);
306cdf0e10cSrcweir     void addEntryToJarFile(const std::string &prefix,
307cdf0e10cSrcweir                 const std::string &entryName, const Hashtable &bytesToAdd);
308cdf0e10cSrcweir private:
309cdf0e10cSrcweir     xmlDocPtr getSourceDocument(const fs::path &filePath);
310cdf0e10cSrcweir     HashSet switchFind(xmlDocPtr doc);
311cdf0e10cSrcweir     xmlNodePtr clone(xmlNodePtr node, const std::string& appl);
312cdf0e10cSrcweir     StreamTable &streamTable;
313cdf0e10cSrcweir     const fs::path inputFile, src;
314cdf0e10cSrcweir     const std::string module, lang;
315cdf0e10cSrcweir     const fs::path resEmbStylesheet;
316cdf0e10cSrcweir 	bool bExtensionMode;
317cdf0e10cSrcweir };
318cdf0e10cSrcweir 
319cdf0e10cSrcweir #endif
320cdf0e10cSrcweir 
321cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */
322