1*983d4c8aSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*983d4c8aSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*983d4c8aSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*983d4c8aSAndrew Rist * distributed with this work for additional information 6*983d4c8aSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*983d4c8aSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*983d4c8aSAndrew Rist * "License"); you may not use this file except in compliance 9*983d4c8aSAndrew Rist * with the License. You may obtain a copy of the License at 10*983d4c8aSAndrew Rist * 11*983d4c8aSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*983d4c8aSAndrew Rist * 13*983d4c8aSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*983d4c8aSAndrew Rist * software distributed under the License is distributed on an 15*983d4c8aSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*983d4c8aSAndrew Rist * KIND, either express or implied. See the License for the 17*983d4c8aSAndrew Rist * specific language governing permissions and limitations 18*983d4c8aSAndrew Rist * under the License. 19*983d4c8aSAndrew Rist * 20*983d4c8aSAndrew Rist *************************************************************/ 21*983d4c8aSAndrew Rist 22*983d4c8aSAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir #ifndef HELPCOMPILER_HXX 25cdf0e10cSrcweir #define HELPCOMPILER_HXX 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <string> 28cdf0e10cSrcweir #include <hash_map> 29cdf0e10cSrcweir #include <vector> 30cdf0e10cSrcweir #include <list> 31cdf0e10cSrcweir #include <fstream> 32cdf0e10cSrcweir #include <iostream> 33cdf0e10cSrcweir #include <sstream> 34cdf0e10cSrcweir #include <algorithm> 35cdf0e10cSrcweir #include <ctype.h> 36cdf0e10cSrcweir 37cdf0e10cSrcweir #include <boost/shared_ptr.hpp> 38cdf0e10cSrcweir 39cdf0e10cSrcweir #include <libxml/xmlmemory.h> 40cdf0e10cSrcweir #include <libxml/debugXML.h> 41cdf0e10cSrcweir #include <libxml/HTMLtree.h> 42cdf0e10cSrcweir #include <libxml/xmlIO.h> 43cdf0e10cSrcweir #include <libxml/xinclude.h> 44cdf0e10cSrcweir #include <libxml/catalog.h> 45cdf0e10cSrcweir 46cdf0e10cSrcweir #include <rtl/ustring.hxx> 47cdf0e10cSrcweir #include <osl/thread.h> 48cdf0e10cSrcweir #include <osl/process.h> 49cdf0e10cSrcweir #include <osl/file.hxx> 50cdf0e10cSrcweir 51cdf0e10cSrcweir #include <compilehelp.hxx> 52cdf0e10cSrcweir 53cdf0e10cSrcweir #define EMULATEORIGINAL 1 54cdf0e10cSrcweir 55cdf0e10cSrcweir #ifdef CMCDEBUG 56cdf0e10cSrcweir #define HCDBG(foo) do { if (1) foo; } while(0) 57cdf0e10cSrcweir #else 58cdf0e10cSrcweir #define HCDBG(foo) do { if (0) foo; } while(0) 59cdf0e10cSrcweir #endif 60cdf0e10cSrcweir 61cdf0e10cSrcweir namespace fs 62cdf0e10cSrcweir { 63cdf0e10cSrcweir rtl_TextEncoding getThreadTextEncoding( void ); 64cdf0e10cSrcweir 65cdf0e10cSrcweir enum convert { native }; 66cdf0e10cSrcweir class path 67cdf0e10cSrcweir { 68cdf0e10cSrcweir public: 69cdf0e10cSrcweir ::rtl::OUString data; 70cdf0e10cSrcweir public: path()71cdf0e10cSrcweir path() {} path(const path & rOther)72cdf0e10cSrcweir path(const path &rOther) : data(rOther.data) {} path(const std::string & in,convert)73cdf0e10cSrcweir path(const std::string &in, convert) 74cdf0e10cSrcweir { 75cdf0e10cSrcweir rtl::OUString sWorkingDir; 76cdf0e10cSrcweir osl_getProcessWorkingDir(&sWorkingDir.pData); 77cdf0e10cSrcweir 78cdf0e10cSrcweir rtl::OString tmp(in.c_str()); 79cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 80cdf0e10cSrcweir osl::File::getFileURLFromSystemPath(ustrSystemPath, data); 81cdf0e10cSrcweir osl::File::getAbsoluteFileURL(sWorkingDir, data, data); 82cdf0e10cSrcweir } path(const std::string & FileURL)83cdf0e10cSrcweir path(const std::string &FileURL) 84cdf0e10cSrcweir { 85cdf0e10cSrcweir rtl::OString tmp(FileURL.c_str()); 86cdf0e10cSrcweir data = rtl::OStringToOUString(tmp, getThreadTextEncoding()); 87cdf0e10cSrcweir } native_file_string() const88cdf0e10cSrcweir std::string native_file_string() const 89cdf0e10cSrcweir { 90cdf0e10cSrcweir ::rtl::OUString ustrSystemPath; 91cdf0e10cSrcweir osl::File::getSystemPathFromFileURL(data, ustrSystemPath); 92cdf0e10cSrcweir rtl::OString tmp(rtl::OUStringToOString(ustrSystemPath, getThreadTextEncoding())); 93cdf0e10cSrcweir HCDBG(std::cerr << "native_file_string is " << tmp.getStr() << std::endl); 94cdf0e10cSrcweir return std::string(tmp.getStr()); 95cdf0e10cSrcweir } 96cdf0e10cSrcweir #ifdef WNT native_file_string_w() const97cdf0e10cSrcweir wchar_t const * native_file_string_w() const 98cdf0e10cSrcweir { 99cdf0e10cSrcweir ::rtl::OUString ustrSystemPath; 100cdf0e10cSrcweir osl::File::getSystemPathFromFileURL(data, ustrSystemPath); 101cdf0e10cSrcweir return reinterpret_cast< wchar_t const * >(ustrSystemPath.getStr()); 102cdf0e10cSrcweir } 103cdf0e10cSrcweir #endif native_directory_string() const104cdf0e10cSrcweir std::string native_directory_string() const { return native_file_string(); } toUTF8() const105cdf0e10cSrcweir std::string toUTF8() const 106cdf0e10cSrcweir { 107cdf0e10cSrcweir rtl::OString tmp(rtl::OUStringToOString(data, RTL_TEXTENCODING_UTF8)); 108cdf0e10cSrcweir return std::string(tmp.getStr()); 109cdf0e10cSrcweir } empty() const110cdf0e10cSrcweir bool empty() const { return data.getLength() == 0; } operator /(const std::string & in) const111cdf0e10cSrcweir path operator/(const std::string &in) const 112cdf0e10cSrcweir { 113cdf0e10cSrcweir path ret(*this); 114cdf0e10cSrcweir HCDBG(std::cerr << "orig was " << 115cdf0e10cSrcweir rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); 116cdf0e10cSrcweir rtl::OString tmp(in.c_str()); 117cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 118cdf0e10cSrcweir ret.data += rtl::OUString(sal_Unicode('/')); 119cdf0e10cSrcweir ret.data += ustrSystemPath; 120cdf0e10cSrcweir HCDBG(std::cerr << "final is " << 121cdf0e10cSrcweir rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); 122cdf0e10cSrcweir return ret; 123cdf0e10cSrcweir } append(const char * in)124cdf0e10cSrcweir void append(const char *in) 125cdf0e10cSrcweir { 126cdf0e10cSrcweir rtl::OString tmp(in); 127cdf0e10cSrcweir rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding())); 128cdf0e10cSrcweir data = data + ustrSystemPath; 129cdf0e10cSrcweir } append(const std::string & in)130cdf0e10cSrcweir void append(const std::string &in) { append(in.c_str()); } 131cdf0e10cSrcweir }; 132cdf0e10cSrcweir 133cdf0e10cSrcweir void create_directory(const fs::path indexDirName); 134cdf0e10cSrcweir void rename(const fs::path &src, const fs::path &dest); 135cdf0e10cSrcweir void copy(const fs::path &src, const fs::path &dest); 136cdf0e10cSrcweir bool exists(const fs::path &in); 137cdf0e10cSrcweir void remove_all(const fs::path &in); 138cdf0e10cSrcweir void remove(const fs::path &in); 139cdf0e10cSrcweir } 140cdf0e10cSrcweir 141cdf0e10cSrcweir struct joaat_hash 142cdf0e10cSrcweir { operator ()joaat_hash143cdf0e10cSrcweir size_t operator()(const std::string &str) const 144cdf0e10cSrcweir { 145cdf0e10cSrcweir size_t hash = 0; 146cdf0e10cSrcweir const char *key = str.data(); 147cdf0e10cSrcweir for (size_t i = 0; i < str.size(); i++) 148cdf0e10cSrcweir { 149cdf0e10cSrcweir hash += key[i]; 150cdf0e10cSrcweir hash += (hash << 10); 151cdf0e10cSrcweir hash ^= (hash >> 6); 152cdf0e10cSrcweir } 153cdf0e10cSrcweir hash += (hash << 3); 154cdf0e10cSrcweir hash ^= (hash >> 11); 155cdf0e10cSrcweir hash += (hash << 15); 156cdf0e10cSrcweir return hash; 157cdf0e10cSrcweir } 158cdf0e10cSrcweir }; 159cdf0e10cSrcweir 160cdf0e10cSrcweir #define get16bits(d) ((((sal_uInt32)(((const sal_uInt8 *)(d))[1])) << 8)\ 161cdf0e10cSrcweir +(sal_uInt32)(((const sal_uInt8 *)(d))[0]) ) 162cdf0e10cSrcweir 163cdf0e10cSrcweir struct SuperFastHash 164cdf0e10cSrcweir { operator ()SuperFastHash165cdf0e10cSrcweir size_t operator()(const std::string &str) const 166cdf0e10cSrcweir { 167cdf0e10cSrcweir const char * data = str.data(); 168cdf0e10cSrcweir int len = str.size(); 169cdf0e10cSrcweir size_t hash = len, tmp; 170cdf0e10cSrcweir if (len <= 0 || data == NULL) return 0; 171cdf0e10cSrcweir 172cdf0e10cSrcweir int rem = len & 3; 173cdf0e10cSrcweir len >>= 2; 174cdf0e10cSrcweir 175cdf0e10cSrcweir /* Main loop */ 176cdf0e10cSrcweir for (;len > 0; len--) 177cdf0e10cSrcweir { 178cdf0e10cSrcweir hash += get16bits (data); 179cdf0e10cSrcweir tmp = (get16bits (data+2) << 11) ^ hash; 180cdf0e10cSrcweir hash = (hash << 16) ^ tmp; 181cdf0e10cSrcweir data += 2*sizeof (sal_uInt16); 182cdf0e10cSrcweir hash += hash >> 11; 183cdf0e10cSrcweir } 184cdf0e10cSrcweir 185cdf0e10cSrcweir /* Handle end cases */ 186cdf0e10cSrcweir switch (rem) 187cdf0e10cSrcweir { 188cdf0e10cSrcweir case 3: hash += get16bits (data); 189cdf0e10cSrcweir hash ^= hash << 16; 190cdf0e10cSrcweir hash ^= data[sizeof (sal_uInt16)] << 18; 191cdf0e10cSrcweir hash += hash >> 11; 192cdf0e10cSrcweir break; 193cdf0e10cSrcweir case 2: hash += get16bits (data); 194cdf0e10cSrcweir hash ^= hash << 11; 195cdf0e10cSrcweir hash += hash >> 17; 196cdf0e10cSrcweir break; 197cdf0e10cSrcweir case 1: hash += *data; 198cdf0e10cSrcweir hash ^= hash << 10; 199cdf0e10cSrcweir hash += hash >> 1; 200cdf0e10cSrcweir } 201cdf0e10cSrcweir 202cdf0e10cSrcweir /* Force "avalanching" of final 127 bits */ 203cdf0e10cSrcweir hash ^= hash << 3; 204cdf0e10cSrcweir hash += hash >> 5; 205cdf0e10cSrcweir hash ^= hash << 4; 206cdf0e10cSrcweir hash += hash >> 17; 207cdf0e10cSrcweir hash ^= hash << 25; 208cdf0e10cSrcweir hash += hash >> 6; 209cdf0e10cSrcweir 210cdf0e10cSrcweir return hash; 211cdf0e10cSrcweir } 212cdf0e10cSrcweir }; 213cdf0e10cSrcweir 214cdf0e10cSrcweir #define pref_hash joaat_hash 215cdf0e10cSrcweir 216cdf0e10cSrcweir typedef std::hash_map<std::string, std::string, pref_hash> Stringtable; 217cdf0e10cSrcweir typedef std::list<std::string> LinkedList; 218cdf0e10cSrcweir typedef std::vector<std::string> HashSet; 219cdf0e10cSrcweir 220cdf0e10cSrcweir typedef std::hash_map<std::string, LinkedList, pref_hash> Hashtable; 221cdf0e10cSrcweir 222cdf0e10cSrcweir class StreamTable 223cdf0e10cSrcweir { 224cdf0e10cSrcweir public: 225cdf0e10cSrcweir std::string document_id; 226cdf0e10cSrcweir std::string document_path; 227cdf0e10cSrcweir std::string document_module; 228cdf0e10cSrcweir std::string document_title; 229cdf0e10cSrcweir 230cdf0e10cSrcweir HashSet *appl_hidlist; 231cdf0e10cSrcweir Hashtable *appl_keywords; 232cdf0e10cSrcweir Stringtable *appl_helptexts; 233cdf0e10cSrcweir xmlDocPtr appl_doc; 234cdf0e10cSrcweir 235cdf0e10cSrcweir HashSet *default_hidlist; 236cdf0e10cSrcweir Hashtable *default_keywords; 237cdf0e10cSrcweir Stringtable *default_helptexts; 238cdf0e10cSrcweir xmlDocPtr default_doc; 239cdf0e10cSrcweir StreamTable()240cdf0e10cSrcweir StreamTable() : 241cdf0e10cSrcweir appl_hidlist(NULL), appl_keywords(NULL), appl_helptexts(NULL), appl_doc(NULL), 242cdf0e10cSrcweir default_hidlist(NULL), default_keywords(NULL), default_helptexts(NULL), default_doc(NULL) 243cdf0e10cSrcweir {} dropdefault()244cdf0e10cSrcweir void dropdefault() 245cdf0e10cSrcweir { 246cdf0e10cSrcweir delete default_hidlist; 247cdf0e10cSrcweir delete default_keywords; 248cdf0e10cSrcweir delete default_helptexts; 249cdf0e10cSrcweir if (default_doc) xmlFreeDoc(default_doc); 250cdf0e10cSrcweir } dropappl()251cdf0e10cSrcweir void dropappl() 252cdf0e10cSrcweir { 253cdf0e10cSrcweir delete appl_hidlist; 254cdf0e10cSrcweir delete appl_keywords; 255cdf0e10cSrcweir delete appl_helptexts; 256cdf0e10cSrcweir if (appl_doc) xmlFreeDoc(appl_doc); 257cdf0e10cSrcweir } ~StreamTable()258cdf0e10cSrcweir ~StreamTable() 259cdf0e10cSrcweir { 260cdf0e10cSrcweir dropappl(); 261cdf0e10cSrcweir dropdefault(); 262cdf0e10cSrcweir } 263cdf0e10cSrcweir }; 264cdf0e10cSrcweir 265cdf0e10cSrcweir struct HelpProcessingException 266cdf0e10cSrcweir { 267cdf0e10cSrcweir HelpProcessingErrorClass m_eErrorClass; 268cdf0e10cSrcweir std::string m_aErrorMsg; 269cdf0e10cSrcweir std::string m_aXMLParsingFile; 270cdf0e10cSrcweir int m_nXMLParsingLine; 271cdf0e10cSrcweir HelpProcessingExceptionHelpProcessingException272cdf0e10cSrcweir HelpProcessingException( HelpProcessingErrorClass eErrorClass, const std::string& aErrorMsg ) 273cdf0e10cSrcweir : m_eErrorClass( eErrorClass ) 274cdf0e10cSrcweir , m_aErrorMsg( aErrorMsg ) 275cdf0e10cSrcweir {} HelpProcessingExceptionHelpProcessingException276cdf0e10cSrcweir HelpProcessingException( const std::string& aErrorMsg, const std::string& aXMLParsingFile, int nXMLParsingLine ) 277cdf0e10cSrcweir : m_eErrorClass( HELPPROCESSING_XMLPARSING_ERROR ) 278cdf0e10cSrcweir , m_aErrorMsg( aErrorMsg ) 279cdf0e10cSrcweir , m_aXMLParsingFile( aXMLParsingFile ) 280cdf0e10cSrcweir , m_nXMLParsingLine( nXMLParsingLine ) 281cdf0e10cSrcweir {} 282cdf0e10cSrcweir }; 283cdf0e10cSrcweir 284cdf0e10cSrcweir class HelpCompiler 285cdf0e10cSrcweir { 286cdf0e10cSrcweir public: 287cdf0e10cSrcweir HelpCompiler(StreamTable &streamTable, 288cdf0e10cSrcweir const fs::path &in_inputFile, 289cdf0e10cSrcweir const fs::path &in_src, 290cdf0e10cSrcweir const fs::path &in_resEmbStylesheet, 291cdf0e10cSrcweir const std::string &in_module, 292cdf0e10cSrcweir const std::string &in_lang, 293cdf0e10cSrcweir bool in_bExtensionMode); 294cdf0e10cSrcweir bool compile( void ) throw (HelpProcessingException); 295cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 296cdf0e10cSrcweir const std::string &entryName, const std::string &bytesToAdd); 297cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 298cdf0e10cSrcweir const std::string &entryName, const HashSet &bytesToAdd); 299cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 300cdf0e10cSrcweir const std::string &entryName, const Stringtable &bytesToAdd); 301cdf0e10cSrcweir void addEntryToJarFile(const std::string &prefix, 302cdf0e10cSrcweir const std::string &entryName, const Hashtable &bytesToAdd); 303cdf0e10cSrcweir private: 304cdf0e10cSrcweir xmlDocPtr getSourceDocument(const fs::path &filePath); 305cdf0e10cSrcweir HashSet switchFind(xmlDocPtr doc); 306cdf0e10cSrcweir xmlNodePtr clone(xmlNodePtr node, const std::string& appl); 307cdf0e10cSrcweir StreamTable &streamTable; 308cdf0e10cSrcweir const fs::path inputFile, src; 309cdf0e10cSrcweir const std::string module, lang; 310cdf0e10cSrcweir const fs::path resEmbStylesheet; 311cdf0e10cSrcweir bool bExtensionMode; 312cdf0e10cSrcweir }; 313cdf0e10cSrcweir 314cdf0e10cSrcweir #endif 315cdf0e10cSrcweir 316cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */ 317