1*983d4c8aSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*983d4c8aSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*983d4c8aSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*983d4c8aSAndrew Rist * distributed with this work for additional information 6*983d4c8aSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*983d4c8aSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*983d4c8aSAndrew Rist * "License"); you may not use this file except in compliance 9*983d4c8aSAndrew Rist * with the License. You may obtain a copy of the License at 10*983d4c8aSAndrew Rist * 11*983d4c8aSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*983d4c8aSAndrew Rist * 13*983d4c8aSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*983d4c8aSAndrew Rist * software distributed under the License is distributed on an 15*983d4c8aSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*983d4c8aSAndrew Rist * KIND, either express or implied. See the License for the 17*983d4c8aSAndrew Rist * specific language governing permissions and limitations 18*983d4c8aSAndrew Rist * under the License. 19*983d4c8aSAndrew Rist * 20*983d4c8aSAndrew Rist *************************************************************/ 21*983d4c8aSAndrew Rist 22*983d4c8aSAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir #ifndef HELPCOMPILER_HXX 25cdf0e10cSrcweir #define HELPCOMPILER_HXX 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <string> 28cdf0e10cSrcweir #include <hash_map> 29cdf0e10cSrcweir #include <vector> 30cdf0e10cSrcweir #include <list> 31cdf0e10cSrcweir #include <fstream> 32cdf0e10cSrcweir #include <iostream> 33cdf0e10cSrcweir #include <sstream> 34cdf0e10cSrcweir #include <algorithm> 35cdf0e10cSrcweir #include <ctype.h> 36cdf0e10cSrcweir #ifdef SYSTEM_DB 37cdf0e10cSrcweir #include <db.h> 38cdf0e10cSrcweir #else 39cdf0e10cSrcweir #include <berkeleydb/db.h> 40cdf0e10cSrcweir #endif 41cdf0e10cSrcweir 42cdf0e10cSrcweir #include <boost/shared_ptr.hpp> 43cdf0e10cSrcweir 44cdf0e10cSrcweir #include <libxml/xmlmemory.h> 45cdf0e10cSrcweir #include <libxml/debugXML.h> 46cdf0e10cSrcweir #include <libxml/HTMLtree.h> 47cdf0e10cSrcweir #include <libxml/xmlIO.h> 48cdf0e10cSrcweir #include <libxml/xinclude.h> 49cdf0e10cSrcweir #include <libxml/catalog.h> 50cdf0e10cSrcweir 51cdf0e10cSrcweir #include <rtl/ustring.hxx> 52cdf0e10cSrcweir #include <osl/thread.h> 53cdf0e10cSrcweir #include <osl/process.h> 54cdf0e10cSrcweir #include <osl/file.hxx> 55cdf0e10cSrcweir 56cdf0e10cSrcweir #include <compilehelp.hxx> 57cdf0e10cSrcweir 58cdf0e10cSrcweir #define EMULATEORIGINAL 1 59cdf0e10cSrcweir 60cdf0e10cSrcweir #ifdef CMCDEBUG 61cdf0e10cSrcweir #define HCDBG(foo) do { if (1) foo; } while(0) 62cdf0e10cSrcweir #else 63cdf0e10cSrcweir #define HCDBG(foo) do { if (0) foo; } while(0) 64cdf0e10cSrcweir #endif 65cdf0e10cSrcweir 66cdf0e10cSrcweir namespace fs 67cdf0e10cSrcweir { 68cdf0e10cSrcweir rtl_TextEncoding getThreadTextEncoding( void ); 69cdf0e10cSrcweir 70cdf0e10cSrcweir enum convert { native }; 71cdf0e10cSrcweir class path 72cdf0e10cSrcweir { 73cdf0e10cSrcweir public: 74cdf0e10cSrcweir ::rtl::OUString data; 75cdf0e10cSrcweir public: 76cdf0e10cSrcweir path() {} 77cdf0e10cSrcweir path(const path &rOther) : data(rOther.data) {} 78cdf0e10cSrcweir path(const std::string &in, convert) 79cdf0e10cSrcweir { 80cdf0e10cSrcweir rtl::OUString sWorkingDir; 81cdf0e10cSrcweir osl_getProcessWorkingDir(&sWorkingDir.pData); 82cdf0e10cSrcweir 83cdf0e10cSrcweir rtl::OString tmp(in.c_str()); 84cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 85cdf0e10cSrcweir osl::File::getFileURLFromSystemPath(ustrSystemPath, data); 86cdf0e10cSrcweir osl::File::getAbsoluteFileURL(sWorkingDir, data, data); 87cdf0e10cSrcweir } 88cdf0e10cSrcweir path(const std::string &FileURL) 89cdf0e10cSrcweir { 90cdf0e10cSrcweir rtl::OString tmp(FileURL.c_str()); 91cdf0e10cSrcweir data = rtl::OStringToOUString(tmp, getThreadTextEncoding()); 92cdf0e10cSrcweir } 93cdf0e10cSrcweir std::string native_file_string() const 94cdf0e10cSrcweir { 95cdf0e10cSrcweir ::rtl::OUString ustrSystemPath; 96cdf0e10cSrcweir osl::File::getSystemPathFromFileURL(data, ustrSystemPath); 97cdf0e10cSrcweir rtl::OString tmp(rtl::OUStringToOString(ustrSystemPath, getThreadTextEncoding())); 98cdf0e10cSrcweir HCDBG(std::cerr << "native_file_string is " << tmp.getStr() << std::endl); 99cdf0e10cSrcweir return std::string(tmp.getStr()); 100cdf0e10cSrcweir } 101cdf0e10cSrcweir #ifdef WNT 102cdf0e10cSrcweir wchar_t const * native_file_string_w() const 103cdf0e10cSrcweir { 104cdf0e10cSrcweir ::rtl::OUString ustrSystemPath; 105cdf0e10cSrcweir osl::File::getSystemPathFromFileURL(data, ustrSystemPath); 106cdf0e10cSrcweir return reinterpret_cast< wchar_t const * >(ustrSystemPath.getStr()); 107cdf0e10cSrcweir } 108cdf0e10cSrcweir #endif 109cdf0e10cSrcweir std::string native_directory_string() const { return native_file_string(); } 110cdf0e10cSrcweir std::string toUTF8() const 111cdf0e10cSrcweir { 112cdf0e10cSrcweir rtl::OString tmp(rtl::OUStringToOString(data, RTL_TEXTENCODING_UTF8)); 113cdf0e10cSrcweir return std::string(tmp.getStr()); 114cdf0e10cSrcweir } 115cdf0e10cSrcweir bool empty() const { return data.getLength() == 0; } 116cdf0e10cSrcweir path operator/(const std::string &in) const 117cdf0e10cSrcweir { 118cdf0e10cSrcweir path ret(*this); 119cdf0e10cSrcweir HCDBG(std::cerr << "orig was " << 120cdf0e10cSrcweir rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); 121cdf0e10cSrcweir rtl::OString tmp(in.c_str()); 122cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 123cdf0e10cSrcweir ret.data += rtl::OUString(sal_Unicode('/')); 124cdf0e10cSrcweir ret.data += ustrSystemPath; 125cdf0e10cSrcweir HCDBG(std::cerr << "final is " << 126cdf0e10cSrcweir rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); 127cdf0e10cSrcweir return ret; 128cdf0e10cSrcweir } 129cdf0e10cSrcweir void append(const char *in) 130cdf0e10cSrcweir { 131cdf0e10cSrcweir rtl::OString tmp(in); 132cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 133cdf0e10cSrcweir data = data + ustrSystemPath; 134cdf0e10cSrcweir } 135cdf0e10cSrcweir void append(const std::string &in) { append(in.c_str()); } 136cdf0e10cSrcweir }; 137cdf0e10cSrcweir 138cdf0e10cSrcweir void create_directory(const fs::path indexDirName); 139cdf0e10cSrcweir void rename(const fs::path &src, const fs::path &dest); 140cdf0e10cSrcweir void copy(const fs::path &src, const fs::path &dest); 141cdf0e10cSrcweir bool exists(const fs::path &in); 142cdf0e10cSrcweir void remove_all(const fs::path &in); 143cdf0e10cSrcweir void remove(const fs::path &in); 144cdf0e10cSrcweir } 145cdf0e10cSrcweir 146cdf0e10cSrcweir struct joaat_hash 147cdf0e10cSrcweir { 148cdf0e10cSrcweir size_t operator()(const std::string &str) const 149cdf0e10cSrcweir { 150cdf0e10cSrcweir size_t hash = 0; 151cdf0e10cSrcweir const char *key = str.data(); 152cdf0e10cSrcweir for (size_t i = 0; i < str.size(); i++) 153cdf0e10cSrcweir { 154cdf0e10cSrcweir hash += key[i]; 155cdf0e10cSrcweir hash += (hash << 10); 156cdf0e10cSrcweir hash ^= (hash >> 6); 157cdf0e10cSrcweir } 158cdf0e10cSrcweir hash += (hash << 3); 159cdf0e10cSrcweir hash ^= (hash >> 11); 160cdf0e10cSrcweir hash += (hash << 15); 161cdf0e10cSrcweir return hash; 162cdf0e10cSrcweir } 163cdf0e10cSrcweir }; 164cdf0e10cSrcweir 165cdf0e10cSrcweir #define get16bits(d) ((((sal_uInt32)(((const sal_uInt8 *)(d))[1])) << 8)\ 166cdf0e10cSrcweir +(sal_uInt32)(((const sal_uInt8 *)(d))[0]) ) 167cdf0e10cSrcweir 168cdf0e10cSrcweir struct SuperFastHash 169cdf0e10cSrcweir { 170cdf0e10cSrcweir size_t operator()(const std::string &str) const 171cdf0e10cSrcweir { 172cdf0e10cSrcweir const char * data = str.data(); 173cdf0e10cSrcweir int len = str.size(); 174cdf0e10cSrcweir size_t hash = len, tmp; 175cdf0e10cSrcweir if (len <= 0 || data == NULL) return 0; 176cdf0e10cSrcweir 177cdf0e10cSrcweir int rem = len & 3; 178cdf0e10cSrcweir len >>= 2; 179cdf0e10cSrcweir 180cdf0e10cSrcweir /* Main loop */ 181cdf0e10cSrcweir for (;len > 0; len--) 182cdf0e10cSrcweir { 183cdf0e10cSrcweir hash += get16bits (data); 184cdf0e10cSrcweir tmp = (get16bits (data+2) << 11) ^ hash; 185cdf0e10cSrcweir hash = (hash << 16) ^ tmp; 186cdf0e10cSrcweir data += 2*sizeof (sal_uInt16); 187cdf0e10cSrcweir hash += hash >> 11; 188cdf0e10cSrcweir } 189cdf0e10cSrcweir 190cdf0e10cSrcweir /* Handle end cases */ 191cdf0e10cSrcweir switch (rem) 192cdf0e10cSrcweir { 193cdf0e10cSrcweir case 3: hash += get16bits (data); 194cdf0e10cSrcweir hash ^= hash << 16; 195cdf0e10cSrcweir hash ^= data[sizeof (sal_uInt16)] << 18; 196cdf0e10cSrcweir hash += hash >> 11; 197cdf0e10cSrcweir break; 198cdf0e10cSrcweir case 2: hash += get16bits (data); 199cdf0e10cSrcweir hash ^= hash << 11; 200cdf0e10cSrcweir hash += hash >> 17; 201cdf0e10cSrcweir break; 202cdf0e10cSrcweir case 1: hash += *data; 203cdf0e10cSrcweir hash ^= hash << 10; 204cdf0e10cSrcweir hash += hash >> 1; 205cdf0e10cSrcweir } 206cdf0e10cSrcweir 207cdf0e10cSrcweir /* Force "avalanching" of final 127 bits */ 208cdf0e10cSrcweir hash ^= hash << 3; 209cdf0e10cSrcweir hash += hash >> 5; 210cdf0e10cSrcweir hash ^= hash << 4; 211cdf0e10cSrcweir hash += hash >> 17; 212cdf0e10cSrcweir hash ^= hash << 25; 213cdf0e10cSrcweir hash += hash >> 6; 214cdf0e10cSrcweir 215cdf0e10cSrcweir return hash; 216cdf0e10cSrcweir } 217cdf0e10cSrcweir }; 218cdf0e10cSrcweir 219cdf0e10cSrcweir #define pref_hash joaat_hash 220cdf0e10cSrcweir 221cdf0e10cSrcweir typedef std::hash_map<std::string, std::string, pref_hash> Stringtable; 222cdf0e10cSrcweir typedef std::list<std::string> LinkedList; 223cdf0e10cSrcweir typedef std::vector<std::string> HashSet; 224cdf0e10cSrcweir 225cdf0e10cSrcweir typedef std::hash_map<std::string, LinkedList, pref_hash> Hashtable; 226cdf0e10cSrcweir 227cdf0e10cSrcweir class StreamTable 228cdf0e10cSrcweir { 229cdf0e10cSrcweir public: 230cdf0e10cSrcweir std::string document_id; 231cdf0e10cSrcweir std::string document_path; 232cdf0e10cSrcweir std::string document_module; 233cdf0e10cSrcweir std::string document_title; 234cdf0e10cSrcweir 235cdf0e10cSrcweir HashSet *appl_hidlist; 236cdf0e10cSrcweir Hashtable *appl_keywords; 237cdf0e10cSrcweir Stringtable *appl_helptexts; 238cdf0e10cSrcweir xmlDocPtr appl_doc; 239cdf0e10cSrcweir 240cdf0e10cSrcweir HashSet *default_hidlist; 241cdf0e10cSrcweir Hashtable *default_keywords; 242cdf0e10cSrcweir Stringtable *default_helptexts; 243cdf0e10cSrcweir xmlDocPtr default_doc; 244cdf0e10cSrcweir 245cdf0e10cSrcweir StreamTable() : 246cdf0e10cSrcweir appl_hidlist(NULL), appl_keywords(NULL), appl_helptexts(NULL), appl_doc(NULL), 247cdf0e10cSrcweir default_hidlist(NULL), default_keywords(NULL), default_helptexts(NULL), default_doc(NULL) 248cdf0e10cSrcweir {} 249cdf0e10cSrcweir void dropdefault() 250cdf0e10cSrcweir { 251cdf0e10cSrcweir delete default_hidlist; 252cdf0e10cSrcweir delete default_keywords; 253cdf0e10cSrcweir delete default_helptexts; 254cdf0e10cSrcweir if (default_doc) xmlFreeDoc(default_doc); 255cdf0e10cSrcweir } 256cdf0e10cSrcweir void dropappl() 257cdf0e10cSrcweir { 258cdf0e10cSrcweir delete appl_hidlist; 259cdf0e10cSrcweir delete appl_keywords; 260cdf0e10cSrcweir delete appl_helptexts; 261cdf0e10cSrcweir if (appl_doc) xmlFreeDoc(appl_doc); 262cdf0e10cSrcweir } 263cdf0e10cSrcweir ~StreamTable() 264cdf0e10cSrcweir { 265cdf0e10cSrcweir dropappl(); 266cdf0e10cSrcweir dropdefault(); 267cdf0e10cSrcweir } 268cdf0e10cSrcweir }; 269cdf0e10cSrcweir 270cdf0e10cSrcweir struct HelpProcessingException 271cdf0e10cSrcweir { 272cdf0e10cSrcweir HelpProcessingErrorClass m_eErrorClass; 273cdf0e10cSrcweir std::string m_aErrorMsg; 274cdf0e10cSrcweir std::string m_aXMLParsingFile; 275cdf0e10cSrcweir int m_nXMLParsingLine; 276cdf0e10cSrcweir 277cdf0e10cSrcweir HelpProcessingException( HelpProcessingErrorClass eErrorClass, const std::string& aErrorMsg ) 278cdf0e10cSrcweir : m_eErrorClass( eErrorClass ) 279cdf0e10cSrcweir , m_aErrorMsg( aErrorMsg ) 280cdf0e10cSrcweir {} 281cdf0e10cSrcweir HelpProcessingException( const std::string& aErrorMsg, const std::string& aXMLParsingFile, int nXMLParsingLine ) 282cdf0e10cSrcweir : m_eErrorClass( HELPPROCESSING_XMLPARSING_ERROR ) 283cdf0e10cSrcweir , m_aErrorMsg( aErrorMsg ) 284cdf0e10cSrcweir , m_aXMLParsingFile( aXMLParsingFile ) 285cdf0e10cSrcweir , m_nXMLParsingLine( nXMLParsingLine ) 286cdf0e10cSrcweir {} 287cdf0e10cSrcweir }; 288cdf0e10cSrcweir 289cdf0e10cSrcweir class HelpCompiler 290cdf0e10cSrcweir { 291cdf0e10cSrcweir public: 292cdf0e10cSrcweir HelpCompiler(StreamTable &streamTable, 293cdf0e10cSrcweir const fs::path &in_inputFile, 294cdf0e10cSrcweir const fs::path &in_src, 295cdf0e10cSrcweir const fs::path &in_resEmbStylesheet, 296cdf0e10cSrcweir const std::string &in_module, 297cdf0e10cSrcweir const std::string &in_lang, 298cdf0e10cSrcweir bool in_bExtensionMode); 299cdf0e10cSrcweir bool compile( void ) throw (HelpProcessingException); 300cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 301cdf0e10cSrcweir const std::string &entryName, const std::string &bytesToAdd); 302cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 303cdf0e10cSrcweir const std::string &entryName, const HashSet &bytesToAdd); 304cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 305cdf0e10cSrcweir const std::string &entryName, const Stringtable &bytesToAdd); 306cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 307cdf0e10cSrcweir const std::string &entryName, const Hashtable &bytesToAdd); 308cdf0e10cSrcweir private: 309cdf0e10cSrcweir xmlDocPtr getSourceDocument(const fs::path &filePath); 310cdf0e10cSrcweir HashSet switchFind(xmlDocPtr doc); 311cdf0e10cSrcweir xmlNodePtr clone(xmlNodePtr node, const std::string& appl); 312cdf0e10cSrcweir StreamTable &streamTable; 313cdf0e10cSrcweir const fs::path inputFile, src; 314cdf0e10cSrcweir const std::string module, lang; 315cdf0e10cSrcweir const fs::path resEmbStylesheet; 316cdf0e10cSrcweir bool bExtensionMode; 317cdf0e10cSrcweir }; 318cdf0e10cSrcweir 319cdf0e10cSrcweir #endif 320cdf0e10cSrcweir 321cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */ 322