1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir #ifndef HELPCOMPILER_HXX 29*cdf0e10cSrcweir #define HELPCOMPILER_HXX 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <string> 32*cdf0e10cSrcweir #include <hash_map> 33*cdf0e10cSrcweir #include <vector> 34*cdf0e10cSrcweir #include <list> 35*cdf0e10cSrcweir #include <fstream> 36*cdf0e10cSrcweir #include <iostream> 37*cdf0e10cSrcweir #include <sstream> 38*cdf0e10cSrcweir #include <algorithm> 39*cdf0e10cSrcweir #include <ctype.h> 40*cdf0e10cSrcweir #ifdef SYSTEM_DB 41*cdf0e10cSrcweir #include <db.h> 42*cdf0e10cSrcweir #else 43*cdf0e10cSrcweir #include <berkeleydb/db.h> 44*cdf0e10cSrcweir #endif 45*cdf0e10cSrcweir 46*cdf0e10cSrcweir #include <boost/shared_ptr.hpp> 47*cdf0e10cSrcweir 48*cdf0e10cSrcweir #include <libxml/xmlmemory.h> 49*cdf0e10cSrcweir #include <libxml/debugXML.h> 50*cdf0e10cSrcweir #include <libxml/HTMLtree.h> 51*cdf0e10cSrcweir #include <libxml/xmlIO.h> 52*cdf0e10cSrcweir #include <libxml/xinclude.h> 53*cdf0e10cSrcweir #include <libxml/catalog.h> 54*cdf0e10cSrcweir 55*cdf0e10cSrcweir #include <rtl/ustring.hxx> 56*cdf0e10cSrcweir #include <osl/thread.h> 57*cdf0e10cSrcweir #include <osl/process.h> 58*cdf0e10cSrcweir #include <osl/file.hxx> 59*cdf0e10cSrcweir 60*cdf0e10cSrcweir #include <compilehelp.hxx> 61*cdf0e10cSrcweir 62*cdf0e10cSrcweir #define EMULATEORIGINAL 1 63*cdf0e10cSrcweir 64*cdf0e10cSrcweir #ifdef CMCDEBUG 65*cdf0e10cSrcweir #define HCDBG(foo) do { if (1) foo; } while(0) 66*cdf0e10cSrcweir #else 67*cdf0e10cSrcweir #define HCDBG(foo) do { if (0) foo; } while(0) 68*cdf0e10cSrcweir #endif 69*cdf0e10cSrcweir 70*cdf0e10cSrcweir namespace fs 71*cdf0e10cSrcweir { 72*cdf0e10cSrcweir rtl_TextEncoding getThreadTextEncoding( void ); 73*cdf0e10cSrcweir 74*cdf0e10cSrcweir enum convert { native }; 75*cdf0e10cSrcweir class path 76*cdf0e10cSrcweir { 77*cdf0e10cSrcweir public: 78*cdf0e10cSrcweir ::rtl::OUString data; 79*cdf0e10cSrcweir public: 80*cdf0e10cSrcweir path() {} 81*cdf0e10cSrcweir path(const path &rOther) : data(rOther.data) {} 82*cdf0e10cSrcweir path(const std::string &in, convert) 83*cdf0e10cSrcweir { 84*cdf0e10cSrcweir rtl::OUString sWorkingDir; 85*cdf0e10cSrcweir osl_getProcessWorkingDir(&sWorkingDir.pData); 86*cdf0e10cSrcweir 87*cdf0e10cSrcweir rtl::OString tmp(in.c_str()); 88*cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 89*cdf0e10cSrcweir osl::File::getFileURLFromSystemPath(ustrSystemPath, data); 90*cdf0e10cSrcweir osl::File::getAbsoluteFileURL(sWorkingDir, data, data); 91*cdf0e10cSrcweir } 92*cdf0e10cSrcweir path(const std::string &FileURL) 93*cdf0e10cSrcweir { 94*cdf0e10cSrcweir rtl::OString tmp(FileURL.c_str()); 95*cdf0e10cSrcweir data = rtl::OStringToOUString(tmp, getThreadTextEncoding()); 96*cdf0e10cSrcweir } 97*cdf0e10cSrcweir std::string native_file_string() const 98*cdf0e10cSrcweir { 99*cdf0e10cSrcweir ::rtl::OUString ustrSystemPath; 100*cdf0e10cSrcweir osl::File::getSystemPathFromFileURL(data, ustrSystemPath); 101*cdf0e10cSrcweir rtl::OString tmp(rtl::OUStringToOString(ustrSystemPath, getThreadTextEncoding())); 102*cdf0e10cSrcweir HCDBG(std::cerr << "native_file_string is " << tmp.getStr() << std::endl); 103*cdf0e10cSrcweir return std::string(tmp.getStr()); 104*cdf0e10cSrcweir } 105*cdf0e10cSrcweir #ifdef WNT 106*cdf0e10cSrcweir wchar_t const * native_file_string_w() const 107*cdf0e10cSrcweir { 108*cdf0e10cSrcweir ::rtl::OUString ustrSystemPath; 109*cdf0e10cSrcweir osl::File::getSystemPathFromFileURL(data, ustrSystemPath); 110*cdf0e10cSrcweir return reinterpret_cast< wchar_t const * >(ustrSystemPath.getStr()); 111*cdf0e10cSrcweir } 112*cdf0e10cSrcweir #endif 113*cdf0e10cSrcweir std::string native_directory_string() const { return native_file_string(); } 114*cdf0e10cSrcweir std::string toUTF8() const 115*cdf0e10cSrcweir { 116*cdf0e10cSrcweir rtl::OString tmp(rtl::OUStringToOString(data, RTL_TEXTENCODING_UTF8)); 117*cdf0e10cSrcweir return std::string(tmp.getStr()); 118*cdf0e10cSrcweir } 119*cdf0e10cSrcweir bool empty() const { return data.getLength() == 0; } 120*cdf0e10cSrcweir path operator/(const std::string &in) const 121*cdf0e10cSrcweir { 122*cdf0e10cSrcweir path ret(*this); 123*cdf0e10cSrcweir HCDBG(std::cerr << "orig was " << 124*cdf0e10cSrcweir rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); 125*cdf0e10cSrcweir rtl::OString tmp(in.c_str()); 126*cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 127*cdf0e10cSrcweir ret.data += rtl::OUString(sal_Unicode('/')); 128*cdf0e10cSrcweir ret.data += ustrSystemPath; 129*cdf0e10cSrcweir HCDBG(std::cerr << "final is " << 130*cdf0e10cSrcweir rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); 131*cdf0e10cSrcweir return ret; 132*cdf0e10cSrcweir } 133*cdf0e10cSrcweir void append(const char *in) 134*cdf0e10cSrcweir { 135*cdf0e10cSrcweir rtl::OString tmp(in); 136*cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 137*cdf0e10cSrcweir data = data + ustrSystemPath; 138*cdf0e10cSrcweir } 139*cdf0e10cSrcweir void append(const std::string &in) { append(in.c_str()); } 140*cdf0e10cSrcweir }; 141*cdf0e10cSrcweir 142*cdf0e10cSrcweir void create_directory(const fs::path indexDirName); 143*cdf0e10cSrcweir void rename(const fs::path &src, const fs::path &dest); 144*cdf0e10cSrcweir void copy(const fs::path &src, const fs::path &dest); 145*cdf0e10cSrcweir bool exists(const fs::path &in); 146*cdf0e10cSrcweir void remove_all(const fs::path &in); 147*cdf0e10cSrcweir void remove(const fs::path &in); 148*cdf0e10cSrcweir } 149*cdf0e10cSrcweir 150*cdf0e10cSrcweir struct joaat_hash 151*cdf0e10cSrcweir { 152*cdf0e10cSrcweir size_t operator()(const std::string &str) const 153*cdf0e10cSrcweir { 154*cdf0e10cSrcweir size_t hash = 0; 155*cdf0e10cSrcweir const char *key = str.data(); 156*cdf0e10cSrcweir for (size_t i = 0; i < str.size(); i++) 157*cdf0e10cSrcweir { 158*cdf0e10cSrcweir hash += key[i]; 159*cdf0e10cSrcweir hash += (hash << 10); 160*cdf0e10cSrcweir hash ^= (hash >> 6); 161*cdf0e10cSrcweir } 162*cdf0e10cSrcweir hash += (hash << 3); 163*cdf0e10cSrcweir hash ^= (hash >> 11); 164*cdf0e10cSrcweir hash += (hash << 15); 165*cdf0e10cSrcweir return hash; 166*cdf0e10cSrcweir } 167*cdf0e10cSrcweir }; 168*cdf0e10cSrcweir 169*cdf0e10cSrcweir #define get16bits(d) ((((sal_uInt32)(((const sal_uInt8 *)(d))[1])) << 8)\ 170*cdf0e10cSrcweir +(sal_uInt32)(((const sal_uInt8 *)(d))[0]) ) 171*cdf0e10cSrcweir 172*cdf0e10cSrcweir struct SuperFastHash 173*cdf0e10cSrcweir { 174*cdf0e10cSrcweir size_t operator()(const std::string &str) const 175*cdf0e10cSrcweir { 176*cdf0e10cSrcweir const char * data = str.data(); 177*cdf0e10cSrcweir int len = str.size(); 178*cdf0e10cSrcweir size_t hash = len, tmp; 179*cdf0e10cSrcweir if (len <= 0 || data == NULL) return 0; 180*cdf0e10cSrcweir 181*cdf0e10cSrcweir int rem = len & 3; 182*cdf0e10cSrcweir len >>= 2; 183*cdf0e10cSrcweir 184*cdf0e10cSrcweir /* Main loop */ 185*cdf0e10cSrcweir for (;len > 0; len--) 186*cdf0e10cSrcweir { 187*cdf0e10cSrcweir hash += get16bits (data); 188*cdf0e10cSrcweir tmp = (get16bits (data+2) << 11) ^ hash; 189*cdf0e10cSrcweir hash = (hash << 16) ^ tmp; 190*cdf0e10cSrcweir data += 2*sizeof (sal_uInt16); 191*cdf0e10cSrcweir hash += hash >> 11; 192*cdf0e10cSrcweir } 193*cdf0e10cSrcweir 194*cdf0e10cSrcweir /* Handle end cases */ 195*cdf0e10cSrcweir switch (rem) 196*cdf0e10cSrcweir { 197*cdf0e10cSrcweir case 3: hash += get16bits (data); 198*cdf0e10cSrcweir hash ^= hash << 16; 199*cdf0e10cSrcweir hash ^= data[sizeof (sal_uInt16)] << 18; 200*cdf0e10cSrcweir hash += hash >> 11; 201*cdf0e10cSrcweir break; 202*cdf0e10cSrcweir case 2: hash += get16bits (data); 203*cdf0e10cSrcweir hash ^= hash << 11; 204*cdf0e10cSrcweir hash += hash >> 17; 205*cdf0e10cSrcweir break; 206*cdf0e10cSrcweir case 1: hash += *data; 207*cdf0e10cSrcweir hash ^= hash << 10; 208*cdf0e10cSrcweir hash += hash >> 1; 209*cdf0e10cSrcweir } 210*cdf0e10cSrcweir 211*cdf0e10cSrcweir /* Force "avalanching" of final 127 bits */ 212*cdf0e10cSrcweir hash ^= hash << 3; 213*cdf0e10cSrcweir hash += hash >> 5; 214*cdf0e10cSrcweir hash ^= hash << 4; 215*cdf0e10cSrcweir hash += hash >> 17; 216*cdf0e10cSrcweir hash ^= hash << 25; 217*cdf0e10cSrcweir hash += hash >> 6; 218*cdf0e10cSrcweir 219*cdf0e10cSrcweir return hash; 220*cdf0e10cSrcweir } 221*cdf0e10cSrcweir }; 222*cdf0e10cSrcweir 223*cdf0e10cSrcweir #define pref_hash joaat_hash 224*cdf0e10cSrcweir 225*cdf0e10cSrcweir typedef std::hash_map<std::string, std::string, pref_hash> Stringtable; 226*cdf0e10cSrcweir typedef std::list<std::string> LinkedList; 227*cdf0e10cSrcweir typedef std::vector<std::string> HashSet; 228*cdf0e10cSrcweir 229*cdf0e10cSrcweir typedef std::hash_map<std::string, LinkedList, pref_hash> Hashtable; 230*cdf0e10cSrcweir 231*cdf0e10cSrcweir class StreamTable 232*cdf0e10cSrcweir { 233*cdf0e10cSrcweir public: 234*cdf0e10cSrcweir std::string document_id; 235*cdf0e10cSrcweir std::string document_path; 236*cdf0e10cSrcweir std::string document_module; 237*cdf0e10cSrcweir std::string document_title; 238*cdf0e10cSrcweir 239*cdf0e10cSrcweir HashSet *appl_hidlist; 240*cdf0e10cSrcweir Hashtable *appl_keywords; 241*cdf0e10cSrcweir Stringtable *appl_helptexts; 242*cdf0e10cSrcweir xmlDocPtr appl_doc; 243*cdf0e10cSrcweir 244*cdf0e10cSrcweir HashSet *default_hidlist; 245*cdf0e10cSrcweir Hashtable *default_keywords; 246*cdf0e10cSrcweir Stringtable *default_helptexts; 247*cdf0e10cSrcweir xmlDocPtr default_doc; 248*cdf0e10cSrcweir 249*cdf0e10cSrcweir StreamTable() : 250*cdf0e10cSrcweir appl_hidlist(NULL), appl_keywords(NULL), appl_helptexts(NULL), appl_doc(NULL), 251*cdf0e10cSrcweir default_hidlist(NULL), default_keywords(NULL), default_helptexts(NULL), default_doc(NULL) 252*cdf0e10cSrcweir {} 253*cdf0e10cSrcweir void dropdefault() 254*cdf0e10cSrcweir { 255*cdf0e10cSrcweir delete default_hidlist; 256*cdf0e10cSrcweir delete default_keywords; 257*cdf0e10cSrcweir delete default_helptexts; 258*cdf0e10cSrcweir if (default_doc) xmlFreeDoc(default_doc); 259*cdf0e10cSrcweir } 260*cdf0e10cSrcweir void dropappl() 261*cdf0e10cSrcweir { 262*cdf0e10cSrcweir delete appl_hidlist; 263*cdf0e10cSrcweir delete appl_keywords; 264*cdf0e10cSrcweir delete appl_helptexts; 265*cdf0e10cSrcweir if (appl_doc) xmlFreeDoc(appl_doc); 266*cdf0e10cSrcweir } 267*cdf0e10cSrcweir ~StreamTable() 268*cdf0e10cSrcweir { 269*cdf0e10cSrcweir dropappl(); 270*cdf0e10cSrcweir dropdefault(); 271*cdf0e10cSrcweir } 272*cdf0e10cSrcweir }; 273*cdf0e10cSrcweir 274*cdf0e10cSrcweir struct HelpProcessingException 275*cdf0e10cSrcweir { 276*cdf0e10cSrcweir HelpProcessingErrorClass m_eErrorClass; 277*cdf0e10cSrcweir std::string m_aErrorMsg; 278*cdf0e10cSrcweir std::string m_aXMLParsingFile; 279*cdf0e10cSrcweir int m_nXMLParsingLine; 280*cdf0e10cSrcweir 281*cdf0e10cSrcweir HelpProcessingException( HelpProcessingErrorClass eErrorClass, const std::string& aErrorMsg ) 282*cdf0e10cSrcweir : m_eErrorClass( eErrorClass ) 283*cdf0e10cSrcweir , m_aErrorMsg( aErrorMsg ) 284*cdf0e10cSrcweir {} 285*cdf0e10cSrcweir HelpProcessingException( const std::string& aErrorMsg, const std::string& aXMLParsingFile, int nXMLParsingLine ) 286*cdf0e10cSrcweir : m_eErrorClass( HELPPROCESSING_XMLPARSING_ERROR ) 287*cdf0e10cSrcweir , m_aErrorMsg( aErrorMsg ) 288*cdf0e10cSrcweir , m_aXMLParsingFile( aXMLParsingFile ) 289*cdf0e10cSrcweir , m_nXMLParsingLine( nXMLParsingLine ) 290*cdf0e10cSrcweir {} 291*cdf0e10cSrcweir }; 292*cdf0e10cSrcweir 293*cdf0e10cSrcweir class HelpCompiler 294*cdf0e10cSrcweir { 295*cdf0e10cSrcweir public: 296*cdf0e10cSrcweir HelpCompiler(StreamTable &streamTable, 297*cdf0e10cSrcweir const fs::path &in_inputFile, 298*cdf0e10cSrcweir const fs::path &in_src, 299*cdf0e10cSrcweir const fs::path &in_resEmbStylesheet, 300*cdf0e10cSrcweir const std::string &in_module, 301*cdf0e10cSrcweir const std::string &in_lang, 302*cdf0e10cSrcweir bool in_bExtensionMode); 303*cdf0e10cSrcweir bool compile( void ) throw (HelpProcessingException); 304*cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 305*cdf0e10cSrcweir const std::string &entryName, const std::string &bytesToAdd); 306*cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 307*cdf0e10cSrcweir const std::string &entryName, const HashSet &bytesToAdd); 308*cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 309*cdf0e10cSrcweir const std::string &entryName, const Stringtable &bytesToAdd); 310*cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 311*cdf0e10cSrcweir const std::string &entryName, const Hashtable &bytesToAdd); 312*cdf0e10cSrcweir private: 313*cdf0e10cSrcweir xmlDocPtr getSourceDocument(const fs::path &filePath); 314*cdf0e10cSrcweir HashSet switchFind(xmlDocPtr doc); 315*cdf0e10cSrcweir xmlNodePtr clone(xmlNodePtr node, const std::string& appl); 316*cdf0e10cSrcweir StreamTable &streamTable; 317*cdf0e10cSrcweir const fs::path inputFile, src; 318*cdf0e10cSrcweir const std::string module, lang; 319*cdf0e10cSrcweir const fs::path resEmbStylesheet; 320*cdf0e10cSrcweir bool bExtensionMode; 321*cdf0e10cSrcweir }; 322*cdf0e10cSrcweir 323*cdf0e10cSrcweir #endif 324*cdf0e10cSrcweir 325*cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */ 326