/************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *************************************************************/ // MARKER(update_precomp.py): autogen include statement, do not remove #include "precompiled_filter.hxx" /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */ #include // memset(), ... #ifndef UNX #include // access() #endif #include #include //rtl_getTextEncodingFromWindowsCodePage #include "msvbasic.hxx" #include using namespace ::com::sun::star::script; /* A few urls which may in the future be of some use http://www.virusbtn.com/vb2000/Programme/papers/bontchev.pdf */ /* class VBA_Impl: * The VBA class provides a set of methods to handle Visual Basic For * Applications streams, the constructor is given the root ole2 stream * of the document, Open reads the VBA project file and figures out * the number of VBA streams, and the offset of the data within them. * Decompress decompresses a particular numbered stream, NoStreams returns * this number, and StreamName can give you the streams name. Decompress * will call Output when it has a 4096 byte collection of data to output, * and also with the final remainder of data if there is still some left * at the end of compression. Output is virtual to allow custom handling * of each chunk of decompressed data. So inherit from this to do something * useful with the data. * * cmc * */ const int MINVBASTRING = 6; VBA_Impl::VBA_Impl(SvStorage &rIn, bool bCmmntd) : aVBAStrings(0), sComment(RTL_CONSTASCII_USTRINGPARAM("Rem ")), xStor(&rIn), pOffsets(0), nOffsets(0), meCharSet(RTL_TEXTENCODING_MS_1252), bCommented(bCmmntd), mbMac(false), nLines(0) { } VBA_Impl::~VBA_Impl() { delete [] pOffsets; for (sal_uLong i=0;i> nIdLen; if (nIdLen < MINVBASTRING) //Error recovery xVBAProject->SeekRel(-2); //undo 2 byte len else { for(sal_uInt16 i=0; i < nIdLen / (bIsUnicode ? 2 : 1); i++) { if (bIsUnicode) *xVBAProject >> nOut16; else { *xVBAProject >> nOut8; nOut16 = nOut8; } sReference += nOut16; if (i==2) { if ((nOut16 == 'G') || (nOut16 == 'H') || (nOut16 == 'C') || nOut16 == 'D') { nType = static_cast(nOut16); } if (nType == 0) { //Error recovery, 2byte len + 3 characters of used type xVBAProject->SeekRel(-(2 + 3 * (bIsUnicode ? 2 : 1))); break; } } } maReferences.push_back(sReference); } return nType; } void VBA_Impl::Output( int nLen, const sal_uInt8*pData ) { /* Each StarBasic module is tragically limited to the maximum len of a string and WordBasic is not, so each overlarge module must be split */ String sTemp((const sal_Char *)pData, (xub_StrLen)nLen, meCharSet); int nTmp = sTemp.GetTokenCount('\x0D'); int nIndex = aVBAStrings.GetSize()-1; if (aVBAStrings.Get(nIndex)->Len() + nLen + ((nLines+nTmp) * sComment.Len()) >= STRING_MAXLEN) { //DBG_ASSERT(0,"New Module String\n"); //we are too large for our boots, break out into another //string nLines=0; nIndex++; aVBAStrings.SetSize(nIndex+1); aVBAStrings.Put(nIndex,new String); } *(aVBAStrings.Get(nIndex)) += sTemp; nLines+=nTmp; } int VBA_Impl::ReadVBAProject(const SvStorageRef &rxVBAStorage) { SvStorageStreamRef xVBAProject; xVBAProject = rxVBAStorage->OpenSotStream( String( RTL_CONSTASCII_USTRINGPARAM( "_VBA_PROJECT" ) ), STREAM_STD_READ | STREAM_NOCREATE ); if( !xVBAProject.Is() || SVSTREAM_OK != xVBAProject->GetError() ) { DBG_WARNING("Not able to find vba project, cannot find macros"); return 0; } static const sal_uInt8 aKnownId[] = {0xCC, 0x61}; sal_uInt8 aId[2]; xVBAProject->Read( aId, sizeof(aId) ); if (memcmp( aId, aKnownId, sizeof(aId))) { DBG_WARNING("unrecognized VBA macro project type"); return 0; } static const sal_uInt8 aOffice2007LE[] = { 0x88, 0x00, 0x00, 0x01, 0x00, 0xFF }; static const sal_uInt8 aOffice2003LE_2[] = { 0x79, 0x00, 0x00, 0x01, 0x00, 0xFF }; static const sal_uInt8 aOffice2003LE[] = { 0x76, 0x00, 0x00, 0x01, 0x00, 0xFF }; static const sal_uInt8 aOfficeXPLE[] = { 0x73, 0x00, 0x00, 0x01, 0x00, 0xFF }; static const sal_uInt8 aOfficeXPBE[] = { 0x63, 0x00, 0x00, 0x0E, 0x00, 0xFF }; static const sal_uInt8 aOffice2000LE[] = { 0x6D, 0x00, 0x00, 0x01, 0x00, 0xFF }; static const sal_uInt8 aOffice98BE[] = { 0x60, 0x00, 0x00, 0x0E, 0x00, 0xFF }; static const sal_uInt8 aOffice97LE[] = { 0x5E, 0x00, 0x00, 0x01, 0x00, 0xFF }; sal_uInt8 aProduct[6]; xVBAProject->Read( aProduct, sizeof(aProduct) ); bool bIsUnicode; if (!(memcmp(aProduct, aOffice2007LE, sizeof(aProduct))) || !(memcmp(aProduct, aOffice2003LE, sizeof(aProduct))) || !(memcmp(aProduct, aOffice2003LE_2, sizeof(aProduct))) || !(memcmp(aProduct, aOfficeXPLE, sizeof(aProduct))) || !(memcmp(aProduct, aOffice2000LE, sizeof(aProduct))) || !(memcmp(aProduct, aOffice97LE, sizeof(aProduct))) ) { xVBAProject->SetNumberFormatInt( NUMBERFORMAT_INT_LITTLEENDIAN ); bIsUnicode = true; } else if (!(memcmp(aProduct, aOfficeXPBE, sizeof(aProduct))) || !(memcmp(aProduct, aOffice98BE, sizeof(aProduct))) ) { xVBAProject->SetNumberFormatInt( NUMBERFORMAT_INT_BIGENDIAN ); mbMac = true; bIsUnicode = false; } else { switch (aProduct[3]) { case 0x1: xVBAProject->SetNumberFormatInt(NUMBERFORMAT_INT_LITTLEENDIAN); bIsUnicode = true; DBG_ASSERT(sal_False, "unrecognized VBA macro version, report to cmc. Guessing at unicode little endian"); break; case 0xe: xVBAProject->SetNumberFormatInt(NUMBERFORMAT_INT_BIGENDIAN); mbMac = true; bIsUnicode = false; DBG_ASSERT(sal_False, "unrecognized VBA macro version, report to cmc. Guessing at 8bit big endian"); break; default: DBG_ASSERT(sal_False, "totally unrecognized VBA macro version, report to cmc"); return 0; } } sal_uInt32 nLidA; //Language identifiers sal_uInt32 nLidB; sal_uInt16 nCharSet; sal_uInt16 nLenA; sal_uInt32 nUnknownB; sal_uInt32 nUnknownC; sal_uInt16 nLenB; sal_uInt16 nLenC; sal_uInt16 nLenD; *xVBAProject >> nLidA >> nLidB >> nCharSet >> nLenA >> nUnknownB; *xVBAProject >> nUnknownC >> nLenB >> nLenC >> nLenD; meCharSet = rtl_getTextEncodingFromWindowsCodePage(nCharSet); DBG_ASSERT(meCharSet != RTL_TEXTENCODING_DONTKNOW, "don't know what vba charset to use"); if (meCharSet == RTL_TEXTENCODING_DONTKNOW) meCharSet = RTL_TEXTENCODING_MS_1252; if (nLenD != 0x02) { DBG_WARNING("Warning VBA number is different, please report"); return 0; } /* A sequence of string that are prepended with a len and then begin with G or H, there are also those that begin with C or D. If a string begins with C or D, it is really two strings, one right after the other. Each string then has a 12 bytes suffix Recognizing the end of the sequence is done by finding a str len of < 6 which does not appear to be the beginning of an object id. Admittedly this isn't a great test, but nothing in the header appears to count the number of strings, and nothing else seems to match. So it'll have to do, its protected by a number of secondry tests to prove its a valid string, and everything gives up if this isn't proven. */ bool bPredictsTrailingTwenty = false; while (1) { sal_uInt8 nType = ReadPString(xVBAProject,bIsUnicode); //Type C and D seem to come as pairs, so skip the following one if (nType == 'C' || nType == 'D') { nType = ReadPString(xVBAProject,bIsUnicode); DBG_ASSERT( nType == 'C' || nType == 'D', "VBA: This must be a 'C' or 'D' string!" ); if (nType != 'C' && nType != 'D') return 0; } if (!nType) break; xVBAProject->SeekRel(10); sal_uInt16 nPredictsTrailingTwenty; *xVBAProject >> nPredictsTrailingTwenty; if (nPredictsTrailingTwenty) bPredictsTrailingTwenty = true; if (bPredictsTrailingTwenty) { sal_uInt16 nTestIsNotString; *xVBAProject >> nTestIsNotString; if (nTestIsNotString < MINVBASTRING) { DBG_ASSERT(nTestIsNotString <= 1, "Haven't seen a len like this in VBA, report to CMC"); xVBAProject->SeekRel(18); bPredictsTrailingTwenty = false; } else xVBAProject->SeekRel(-2); } } sal_Int16 nInt16s; *xVBAProject >> nInt16s; DBG_ASSERT( nInt16s >= 0, "VBA: Bad no of records in VBA Project, panic!" ); if (!nInt16s) return 0; xVBAProject->SeekRel(2*nInt16s); sal_Int16 nInt32s; *xVBAProject >> nInt32s; DBG_ASSERT( nInt32s >= 0, "VBA: Bad no of records in VBA Project, panic!" ); if (!nInt32s) return 0; xVBAProject->SeekRel(4*nInt32s); xVBAProject->SeekRel(2); for(int k=0;k<3;k++) { sal_uInt16 nLen; *xVBAProject >> nLen; if (nLen != 0xFFFF) xVBAProject->SeekRel(nLen); } xVBAProject->SeekRel(100); //Seems fixed len *xVBAProject >> nOffsets; DBG_ASSERT( nOffsets != 0xFFFF, "VBA: Bad nOffsets, panic!!" ); if ((nOffsets == 0xFFFF) || (nOffsets == 0)) return 0; pOffsets = new VBAOffset_Impl[ nOffsets ]; int i, j; for( i=0; i < nOffsets; i++) { sal_uInt16 nLen; *xVBAProject >> nLen; if (bIsUnicode) { sal_Unicode* pBuf = pOffsets[i].sName.AllocBuffer( nLen / 2 ); xVBAProject->Read( (sal_Char*)pBuf, nLen ); #ifdef OSL_BIGENDIAN for( j = 0; j < nLen / 2; ++j, ++pBuf ) *pBuf = SWAPSHORT( *pBuf ); #endif // ifdef OSL_BIGENDIAN } else { ByteString aByteStr; sal_Char* pByteData = aByteStr.AllocBuffer( nLen ); sal_Size nWasRead = xVBAProject->Read( pByteData, nLen ); if( nWasRead != nLen ) aByteStr.ReleaseBufferAccess(); pOffsets[i].sName += String( aByteStr, meCharSet); } *xVBAProject >> nLen; xVBAProject->SeekRel( nLen ); //begin section, another problem area *xVBAProject >> nLen; if ( nLen == 0xFFFF) { xVBAProject->SeekRel(2); *xVBAProject >> nLen; xVBAProject->SeekRel( nLen ); } else xVBAProject->SeekRel( nLen+2 ); *xVBAProject >> nLen; DBG_ASSERT( nLen == 0xFFFF, "VBA: Bad field in VBA Project, panic!!" ); if ( nLen != 0xFFFF) return 0; xVBAProject->SeekRel(6); sal_uInt16 nOctects; *xVBAProject >> nOctects; for(j=0;jSeekRel(8); xVBAProject->SeekRel(5); //end section *xVBAProject >> pOffsets[i].nOffset; xVBAProject->SeekRel(2); } return nOffsets; } /* #117718# For a given Module name return its type, * Form, Class, Document, Normal or Unknown * */ ModType VBA_Impl::GetModuleType( const UniString& rModuleName ) { ModuleTypeHash::iterator iter = mhModHash.find( rModuleName ); ModuleTypeHash::iterator iterEnd = mhModHash.end(); if ( iter != iterEnd ) { return iter->second; } return ModuleType::UNKNOWN; } bool VBA_Impl::Open( const String &rToplevel, const String &rSublevel ) { /* beginning test for vba stuff */ bool bRet = false; SvStorageRef xMacros= xStor->OpenSotStorage( rToplevel, STREAM_READWRITE | STREAM_NOCREATE | STREAM_SHARE_DENYALL ); if( !xMacros.Is() || SVSTREAM_OK != xMacros->GetError() ) { DBG_WARNING("No Macros Storage"); } else { xVBA = xMacros->OpenSotStorage( rSublevel, STREAM_READWRITE | STREAM_NOCREATE | STREAM_SHARE_DENYALL ); if( !xVBA.Is() || SVSTREAM_OK != xVBA->GetError() ) { DBG_WARNING("No Visual Basic in Storage"); } else { if (ReadVBAProject(xVBA)) bRet = true; } /* #117718# * Information regarding the type of module is contained in the * "PROJECT" stream, this stream consists of a number of ascii lines * entries are of the form Key=Value, the ones that we are interested * in have the keys; Class, BaseClass & Module indicating the module * ( value ) is either a Class Module, Form Module or a plain VB Module. */ SvStorageStreamRef xProject = xMacros->OpenSotStream( String( RTL_CONSTASCII_USTRINGPARAM( "PROJECT" ) ) ); SvStorageStream* pStp = xProject; UniString tmp; static const String sThisDoc( RTL_CONSTASCII_USTRINGPARAM( "ThisDocument" ) ); static const String sModule( RTL_CONSTASCII_USTRINGPARAM( "Module" ) ); static const String sClass( RTL_CONSTASCII_USTRINGPARAM( "Class" ) ); static const String sBaseClass( RTL_CONSTASCII_USTRINGPARAM( "BaseClass" ) ); static const String sDocument( RTL_CONSTASCII_USTRINGPARAM( "Document" ) ); mhModHash[ sThisDoc ] = ModuleType::CLASS; while ( pStp->ReadByteStringLine( tmp, meCharSet ) ) { xub_StrLen index = tmp.Search( '=' ); if ( index != STRING_NOTFOUND ) { String key = tmp.Copy( 0, index ); String value = tmp.Copy( index + 1 ); if ( key == sClass ) { mhModHash[ value ] = ModuleType::CLASS; OSL_TRACE("Module %s is of type Class", ::rtl::OUStringToOString( value , RTL_TEXTENCODING_ASCII_US ).pData->buffer ); } else if ( key == sBaseClass ) { mhModHash[ value ] = ModuleType::FORM; OSL_TRACE("Module %s is of type Form", ::rtl::OUStringToOString( value , RTL_TEXTENCODING_ASCII_US ).pData->buffer ); } else if ( key == sDocument ) { /* #i37965# DR 2004-12-03: add "Document", used i.e. in Excel for macros attached to sheet or document. */ // value is of form /&H, strip the identifier value.Erase( value.Search( '/' ) ); mhModHash[ value ] = ModuleType::DOCUMENT; OSL_TRACE("Module %s is of type Document VBA", ::rtl::OUStringToOString( value , RTL_TEXTENCODING_ASCII_US ).pData->buffer ); } else if ( key == sModule ) { mhModHash[ value ] = ModuleType::NORMAL; OSL_TRACE("Module %s is of type Normal VBA", ::rtl::OUStringToOString( value , RTL_TEXTENCODING_ASCII_US ).pData->buffer ); } } } } /* end test for vba stuff */ return bRet; } const StringArray &VBA_Impl::Decompress(sal_uInt16 nIndex, int *pOverflow) { DBG_ASSERT( nIndex < nOffsets, "Index out of range" ); SvStorageStreamRef xVBAStream; aVBAStrings.SetSize(1); aVBAStrings.Put(0,new String); xVBAStream = xVBA->OpenSotStream( pOffsets[nIndex].sName, STREAM_STD_READ | STREAM_NOCREATE ); if (pOverflow) *pOverflow=0; if( !xVBAStream.Is() || SVSTREAM_OK != xVBAStream->GetError() ) { DBG_WARNING("Not able to open vb module "); } else { xVBAStream->SetNumberFormatInt( NUMBERFORMAT_INT_LITTLEENDIAN ); DecompressVBA( nIndex, xVBAStream ); /* * if len was too big for a single string set that variable ? * if ((len > XX) && (pOverflow)) *pOverflow=1; */ if (bCommented) { String sTempStringa; if (mbMac) sTempStringa = String( RTL_CONSTASCII_USTRINGPARAM( "\x0D" ) ); else sTempStringa = String( RTL_CONSTASCII_USTRINGPARAM( "\x0D\x0A" ) ); String sTempStringb(sTempStringa); sTempStringb+=sComment; for(sal_uLong i=0;iSearchAndReplaceAll( sTempStringa,sTempStringb); aVBAStrings.Get(i)->Insert(sComment,0); } } } return aVBAStrings; } int VBA_Impl::DecompressVBA( int nIndex, SvStorageStreamRef &xVBAStream ) { sal_uInt8 nLeadbyte; sal_uInt16 nToken; unsigned int nPos = 0; int nLen, nDistance, nShift, nClean=1; xVBAStream->Seek( pOffsets[ nIndex ].nOffset + 3 ); while(xVBAStream->Read(&nLeadbyte,1)) { for(int nPosition=0x01;nPosition < 0x100;nPosition=nPosition<<1) { //we see if the leadbyte has flagged this location as a dataunit //which is actually a token which must be looked up in the history if (nLeadbyte & nPosition) { *xVBAStream >> nToken; if (nClean == 0) nClean=1; //For some reason the division of the token into the length //field of the data to be inserted, and the distance back into //the history differs depending on how full the history is int nPos2 = nPos % nWINDOWLEN; if (nPos2 <= 0x10) nShift = 12; else if (nPos2 <= 0x20) nShift = 11; else if (nPos2 <= 0x40) nShift = 10; else if (nPos2 <= 0x80) nShift = 9; else if (nPos2 <= 0x100) nShift = 8; else if (nPos2 <= 0x200) nShift = 7; else if (nPos2 <= 0x400) nShift = 6; else if (nPos2 <= 0x800) nShift = 5; else nShift = 4; int i; nLen=0; for(i=0;i> nShift; //read the len of data from the history, wrapping around the //nWINDOWLEN boundary if necessary data read from the history //is also copied into the recent part of the history as well. for (i = 0; i < nLen; i++) { unsigned char c; c = aHistory[(nPos-nDistance-1) % nWINDOWLEN]; aHistory[nPos % nWINDOWLEN] = c; nPos++; } } else { // special boundary case code, not guarantueed to be correct // seems to work though, there is something wrong with the // compression scheme (or maybe a feature) where when the data // ends on a nWINDOWLEN boundary and the excess bytes in the 8 // dataunit list are discarded, and not interpreted as tokens // or normal data. if ((nPos != 0) && ((nPos % nWINDOWLEN) == 0) && (nClean)) { xVBAStream->SeekRel(2); nClean=0; Output(nWINDOWLEN, aHistory); break; } //This is the normal case for when the data unit is not a //token to be looked up, but instead some normal data which //can be output, and placed in the history. if (xVBAStream->Read(&aHistory[nPos % nWINDOWLEN],1)) nPos++; if (nClean == 0) nClean=1; } } } if (nPos % nWINDOWLEN) Output(nPos % nWINDOWLEN,aHistory); return(nPos); } /* vi:set tabstop=4 shiftwidth=4 expandtab: */