1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_xmlhelp.hxx" 30 #include <com/sun/star/ucb/Command.hpp> 31 #include <com/sun/star/ucb/XCommandEnvironment.hpp> 32 #include <com/sun/star/i18n/XExtendedTransliteration.hpp> 33 #include <com/sun/star/ucb/XCommandProcessor.hpp> 34 #include <com/sun/star/lang/Locale.hpp> 35 #include <com/sun/star/script/XInvocation.hpp> 36 37 #ifndef INCLUDED_STL_ALGORITHM 38 #include <algorithm> 39 #define INCLUDED_STL_ALGORITHM 40 #endif 41 #ifndef INCLUDED_STL_SET 42 #include <set> 43 #define INCLUDED_STL_SET 44 #endif 45 46 #include <qe/Query.hxx> 47 #include <qe/DocGenerator.hxx> 48 #include "resultsetforquery.hxx" 49 #include "databases.hxx" 50 51 // For testing 52 // #define LOGGING 53 54 using namespace std; 55 using namespace chelp; 56 using namespace xmlsearch::excep; 57 using namespace xmlsearch::qe; 58 using namespace com::sun::star; 59 using namespace com::sun::star::ucb; 60 using namespace com::sun::star::i18n; 61 using namespace com::sun::star::uno; 62 using namespace com::sun::star::lang; 63 64 struct HitItem 65 { 66 rtl::OUString m_aURL; 67 float m_fScore; 68 69 HitItem( void ) {} 70 HitItem( const rtl::OUString& aURL, float fScore ) 71 : m_aURL( aURL ) 72 , m_fScore( fScore ) 73 {} 74 bool operator < ( const HitItem& rHitItem ) const 75 { 76 return rHitItem.m_fScore < m_fScore; 77 } 78 }; 79 80 ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >& xMSF, 81 const uno::Reference< XContentProvider >& xProvider, 82 sal_Int32 nOpenMode, 83 const uno::Sequence< beans::Property >& seq, 84 const uno::Sequence< NumberedSortingInfo >& seqSort, 85 URLParameter& aURLParameter, 86 Databases* pDatabases ) 87 : ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ), 88 m_pDatabases( pDatabases ), 89 m_aURLParameter( aURLParameter ) 90 { 91 Reference< XTransliteration > xTrans( 92 xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ), 93 UNO_QUERY ); 94 Locale aLocale( aURLParameter.get_language(), 95 rtl::OUString(), 96 rtl::OUString() ); 97 if(xTrans.is()) 98 xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE, 99 aLocale ); 100 101 // Access Lucene via XInvocation 102 Reference< script::XInvocation > xInvocation( 103 xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ), 104 UNO_QUERY ); 105 106 vector< vector< rtl::OUString > > queryList; 107 { 108 sal_Int32 idx; 109 rtl::OUString query = m_aURLParameter.get_query(); 110 while( query.getLength() ) 111 { 112 idx = query.indexOf( sal_Unicode( ' ' ) ); 113 if( idx == -1 ) 114 idx = query.getLength(); 115 116 vector< rtl::OUString > currentQuery; 117 rtl::OUString tmp(query.copy( 0,idx )); 118 rtl:: OUString toliterate = tmp; 119 if(xTrans.is()) { 120 Sequence<sal_Int32> aSeq; 121 toliterate = xTrans->transliterate( 122 tmp,0,tmp.getLength(),aSeq); 123 } 124 125 currentQuery.push_back( toliterate ); 126 queryList.push_back( currentQuery ); 127 128 int nCpy = 1 + idx; 129 if( nCpy >= query.getLength() ) 130 query = rtl::OUString(); 131 else 132 query = query.copy( 1 + idx ); 133 } 134 } 135 136 vector< rtl::OUString > aCompleteResultVector; 137 if( xInvocation.is() ) 138 { 139 rtl::OUString scope = m_aURLParameter.get_scope(); 140 bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 ); 141 sal_Int32 hitCount = m_aURLParameter.get_hitCount(); 142 143 #ifdef LOGGING 144 FILE* pFile = fopen( "d:\\resultset_out.txt", "w" ); 145 #endif 146 147 IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() ); 148 rtl::OUString idxDir; 149 bool bExtension = false; 150 int iDir = 0; 151 vector< vector<HitItem>* > aIndexFolderResultVectorVector; 152 153 bool bTemporary; 154 while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 ) 155 { 156 vector<HitItem> aIndexFolderResultVector; 157 158 try 159 { 160 vector< vector<HitItem>* > aQueryListResultVectorVector; 161 set< rtl::OUString > aSet,aCurrent,aResultSet; 162 163 int nQueryListSize = queryList.size(); 164 if( nQueryListSize > 1 ) 165 hitCount = 2000; 166 167 for( int i = 0; i < nQueryListSize; ++i ) 168 { 169 vector<HitItem>* pQueryResultVector; 170 if( nQueryListSize > 1 ) 171 { 172 pQueryResultVector = new vector<HitItem>(); 173 aQueryListResultVectorVector.push_back( pQueryResultVector ); 174 } 175 else 176 { 177 pQueryResultVector = &aIndexFolderResultVector; 178 } 179 pQueryResultVector->reserve( hitCount ); 180 181 int nParamCount = bCaptionsOnly ? 7 : 6; 182 Sequence<uno::Any> aParamsSeq( nParamCount ); 183 184 aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) ); 185 aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() ); 186 187 aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) ); 188 rtl::OUString aSystemPath; 189 osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath ); 190 aParamsSeq[3] = uno::makeAny( aSystemPath ); 191 192 aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) ); 193 194 const std::vector< rtl::OUString >& aListItem = queryList[i]; 195 ::rtl::OUString aNewQueryStr = aListItem[0]; 196 aParamsSeq[5] = uno::makeAny( aNewQueryStr ); 197 198 if( bCaptionsOnly ) 199 aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) ); 200 201 Sequence< sal_Int16 > aOutParamIndex; 202 Sequence< uno::Any > aOutParam; 203 204 uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ), 205 aParamsSeq, aOutParamIndex, aOutParam ); 206 207 Sequence< float > aScoreSeq; 208 int nScoreCount = 0; 209 int nOutParamCount = aOutParam.getLength(); 210 if( nOutParamCount == 1 ) 211 { 212 const uno::Any* pScoreAnySeq = aOutParam.getConstArray(); 213 if( pScoreAnySeq[0] >>= aScoreSeq ) 214 nScoreCount = aScoreSeq.getLength(); 215 } 216 217 Sequence<rtl::OUString> aRetSeq; 218 if( aRet >>= aRetSeq ) 219 { 220 if( nQueryListSize > 1 ) 221 aSet.clear(); 222 223 const rtl::OUString* pRetSeq = aRetSeq.getConstArray(); 224 int nCount = aRetSeq.getLength(); 225 if( nCount > hitCount ) 226 nCount = hitCount; 227 for( int j = 0 ; j < nCount ; ++j ) 228 { 229 float fScore = 0.0; 230 if( j < nScoreCount ) 231 fScore = aScoreSeq[j]; 232 233 rtl::OUString aURL = pRetSeq[j]; 234 pQueryResultVector->push_back( HitItem( aURL, fScore ) ); 235 if( nQueryListSize > 1 ) 236 aSet.insert( aURL ); 237 238 #ifdef LOGGING 239 if( pFile ) 240 { 241 rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8)); 242 fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() ); 243 } 244 #endif 245 } 246 } 247 248 // intersect 249 if( nQueryListSize > 1 ) 250 { 251 if( i == 0 ) 252 { 253 aResultSet = aSet; 254 } 255 else 256 { 257 aCurrent = aResultSet; 258 aResultSet.clear(); 259 set_intersection( aSet.begin(),aSet.end(), 260 aCurrent.begin(),aCurrent.end(), 261 inserter(aResultSet,aResultSet.begin())); 262 } 263 } 264 } 265 266 // Combine results in aIndexFolderResultVector 267 if( nQueryListSize > 1 ) 268 { 269 for( int n = 0 ; n < nQueryListSize ; ++n ) 270 { 271 vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n]; 272 vector<HitItem>& rQueryResultVector = *pQueryResultVector; 273 274 int nItemCount = rQueryResultVector.size(); 275 for( int i = 0 ; i < nItemCount ; ++i ) 276 { 277 const HitItem& rItem = rQueryResultVector[ i ]; 278 set< rtl::OUString >::iterator it; 279 if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() ) 280 { 281 HitItem aItemCopy( rItem ); 282 aItemCopy.m_fScore /= nQueryListSize; // To get average score 283 if( n == 0 ) 284 { 285 // Use first pass to create entry 286 aIndexFolderResultVector.push_back( aItemCopy ); 287 288 #ifdef LOGGING 289 if( pFile ) 290 { 291 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8)); 292 fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() ); 293 } 294 #endif 295 } 296 else 297 { 298 // Find entry in vector 299 int nCount = aIndexFolderResultVector.size(); 300 for( int j = 0 ; j < nCount ; ++j ) 301 { 302 HitItem& rFindItem = aIndexFolderResultVector[ j ]; 303 if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) ) 304 { 305 #ifdef LOGGING 306 if( pFile ) 307 { 308 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8)); 309 fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i, 310 rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() ); 311 } 312 #endif 313 314 rFindItem.m_fScore += aItemCopy.m_fScore; 315 break; 316 } 317 } 318 } 319 } 320 } 321 322 delete pQueryResultVector; 323 } 324 325 sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() ); 326 } 327 328 vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector ); 329 aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector ); 330 aIndexFolderResultVector.clear(); 331 } 332 catch( const Exception& ) 333 { 334 } 335 336 ++iDir; 337 338 if( bTemporary ) 339 aIndexFolderIt.deleteTempIndexFolder( idxDir ); 340 341 } // Iterator 342 343 344 int nVectorCount = aIndexFolderResultVectorVector.size(); 345 vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount]; 346 for( int j = 0 ; j < nVectorCount ; ++j ) 347 pCurrentVectorIndex[j] = 0; 348 349 #ifdef LOGGING 350 if( pFile ) 351 { 352 for( int k = 0 ; k < nVectorCount ; ++k ) 353 { 354 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k]; 355 int nItemCount = rIndexFolderVector.size(); 356 357 fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount ); 358 359 for( int i = 0 ; i < nItemCount ; ++i ) 360 { 361 const HitItem& rItem = rIndexFolderVector[ i ]; 362 rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8)); 363 fprintf( pFile, " Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() ); 364 } 365 } 366 } 367 #endif 368 369 sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount(); 370 sal_Int32 nHitCount = 0; 371 while( nHitCount < nTotalHitCount ) 372 { 373 int iVectorWithBestScore = -1; 374 float fBestScore = 0.0; 375 for( int k = 0 ; k < nVectorCount ; ++k ) 376 { 377 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k]; 378 if( pCurrentVectorIndex[k] < rIndexFolderVector.size() ) 379 { 380 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ]; 381 382 if( fBestScore < rItem.m_fScore ) 383 { 384 fBestScore = rItem.m_fScore; 385 iVectorWithBestScore = k; 386 } 387 } 388 } 389 390 if( iVectorWithBestScore == -1 ) // No item left at all 391 break; 392 393 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore]; 394 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ]; 395 396 pCurrentVectorIndex[iVectorWithBestScore]++; 397 398 aCompleteResultVector.push_back( rItem.m_aURL ); 399 ++nHitCount; 400 } 401 402 delete[] pCurrentVectorIndex; 403 for( int n = 0 ; n < nVectorCount ; ++n ) 404 { 405 vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n]; 406 delete pIndexFolderVector; 407 } 408 409 #ifdef LOGGING 410 fclose( pFile ); 411 #endif 412 } 413 414 sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength(); 415 rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" ); 416 417 int nResultCount = aCompleteResultVector.size(); 418 for( int r = 0 ; r < nResultCount ; ++r ) 419 { 420 rtl::OUString aURL = aCompleteResultVector[r]; 421 rtl::OUString aResultStr = replWith + aURL.copy(replIdx); 422 m_aPath.push_back( aResultStr ); 423 } 424 425 m_aItems.resize( m_aPath.size() ); 426 m_aIdents.resize( m_aPath.size() ); 427 428 Command aCommand; 429 aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" ); 430 aCommand.Argument <<= m_sProperty; 431 432 for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow ) 433 { 434 m_aPath[m_nRow] = 435 m_aPath[m_nRow] + 436 rtl::OUString::createFromAscii( "?Language=" ) + 437 m_aURLParameter.get_language() + 438 rtl::OUString::createFromAscii( "&System=" ) + 439 m_aURLParameter.get_system(); 440 441 uno::Reference< XContent > content = queryContent(); 442 if( content.is() ) 443 { 444 uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY ); 445 cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>= 446 } 447 } 448 m_nRow = 0xffffffff; 449 } 450