1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_xmlhelp.hxx"
30 #include <com/sun/star/ucb/Command.hpp>
31 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
32 #include <com/sun/star/i18n/XExtendedTransliteration.hpp>
33 #include <com/sun/star/ucb/XCommandProcessor.hpp>
34 #include <com/sun/star/lang/Locale.hpp>
35 #include <com/sun/star/script/XInvocation.hpp>
36 
37 #ifndef INCLUDED_STL_ALGORITHM
38 #include <algorithm>
39 #define INCLUDED_STL_ALGORITHM
40 #endif
41 #ifndef INCLUDED_STL_SET
42 #include <set>
43 #define INCLUDED_STL_SET
44 #endif
45 
46 #include <qe/Query.hxx>
47 #include <qe/DocGenerator.hxx>
48 #include "resultsetforquery.hxx"
49 #include "databases.hxx"
50 
51 // For testing
52 // #define LOGGING
53 
54 using namespace std;
55 using namespace chelp;
56 using namespace xmlsearch::excep;
57 using namespace xmlsearch::qe;
58 using namespace com::sun::star;
59 using namespace com::sun::star::ucb;
60 using namespace com::sun::star::i18n;
61 using namespace com::sun::star::uno;
62 using namespace com::sun::star::lang;
63 
64 struct HitItem
65 {
66 	rtl::OUString	m_aURL;
67 	float			m_fScore;
68 
69 	HitItem( void )	{}
70 	HitItem( const rtl::OUString& aURL, float fScore )
71 		: m_aURL( aURL )
72 		, m_fScore( fScore )
73 	{}
74 	bool operator < ( const HitItem& rHitItem ) const
75 	{
76 		return rHitItem.m_fScore < m_fScore;
77 	}
78 };
79 
80 ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >&  xMSF,
81 									  const uno::Reference< XContentProvider >&  xProvider,
82 									  sal_Int32 nOpenMode,
83 									  const uno::Sequence< beans::Property >& seq,
84 									  const uno::Sequence< NumberedSortingInfo >& seqSort,
85 									  URLParameter& aURLParameter,
86 									  Databases* pDatabases )
87 	: ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ),
88 	  m_pDatabases( pDatabases ),
89       m_aURLParameter( aURLParameter )
90 {
91     Reference< XTransliteration > xTrans(
92         xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ),
93         UNO_QUERY );
94     Locale aLocale( aURLParameter.get_language(),
95                     rtl::OUString(),
96                     rtl::OUString() );
97     if(xTrans.is())
98         xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE,
99                            aLocale );
100 
101 	// Access Lucene via XInvocation
102 	Reference< script::XInvocation > xInvocation(
103         xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ),
104         UNO_QUERY );
105 
106 	vector< vector< rtl::OUString > > queryList;
107 	{
108 		sal_Int32 idx;
109 		rtl::OUString query = m_aURLParameter.get_query();
110 		while( query.getLength() )
111 		{
112 			idx = query.indexOf( sal_Unicode( ' ' ) );
113 			if( idx == -1 )
114 				idx = query.getLength();
115 
116 			vector< rtl::OUString > currentQuery;
117             rtl::OUString tmp(query.copy( 0,idx ));
118             rtl:: OUString toliterate = tmp;
119             if(xTrans.is()) {
120                 Sequence<sal_Int32> aSeq;
121                 toliterate = xTrans->transliterate(
122                     tmp,0,tmp.getLength(),aSeq);
123             }
124 
125 			currentQuery.push_back( toliterate );
126 			queryList.push_back( currentQuery );
127 
128 			int nCpy = 1 + idx;
129 			if( nCpy >= query.getLength() )
130 				query = rtl::OUString();
131 			else
132 				query = query.copy( 1 + idx );
133 		}
134 	}
135 
136 	vector< rtl::OUString > aCompleteResultVector;
137 	if( xInvocation.is() )
138 	{
139 		rtl::OUString scope = m_aURLParameter.get_scope();
140 		bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 );
141 		sal_Int32 hitCount = m_aURLParameter.get_hitCount();
142 
143 #ifdef LOGGING
144 		FILE* pFile = fopen( "d:\\resultset_out.txt", "w" );
145 #endif
146 
147 		IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() );
148 		rtl::OUString idxDir;
149 		bool bExtension = false;
150 		int iDir = 0;
151 		vector< vector<HitItem>* > aIndexFolderResultVectorVector;
152 
153 		bool bTemporary;
154 		while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 )
155 		{
156 			vector<HitItem> aIndexFolderResultVector;
157 
158 			try
159 			{
160 				vector< vector<HitItem>* > aQueryListResultVectorVector;
161 				set< rtl::OUString > aSet,aCurrent,aResultSet;
162 
163 				int nQueryListSize = queryList.size();
164 				if( nQueryListSize > 1 )
165 					hitCount = 2000;
166 
167 				for( int i = 0; i < nQueryListSize; ++i )
168 				{
169 					vector<HitItem>* pQueryResultVector;
170 					if( nQueryListSize > 1 )
171 					{
172 						pQueryResultVector = new vector<HitItem>();
173 						aQueryListResultVectorVector.push_back( pQueryResultVector );
174 					}
175 					else
176 					{
177 						pQueryResultVector = &aIndexFolderResultVector;
178 					}
179 					pQueryResultVector->reserve( hitCount );
180 
181 					int nParamCount = bCaptionsOnly ? 7 : 6;
182 					Sequence<uno::Any> aParamsSeq( nParamCount );
183 
184 					aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) );
185 					aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() );
186 
187 					aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) );
188 					rtl::OUString aSystemPath;
189 					osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath );
190 					aParamsSeq[3] = uno::makeAny( aSystemPath );
191 
192 					aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) );
193 
194 					const std::vector< rtl::OUString >& aListItem = queryList[i];
195 					::rtl::OUString aNewQueryStr = aListItem[0];
196 					aParamsSeq[5] = uno::makeAny( aNewQueryStr );
197 
198 					if( bCaptionsOnly )
199 						aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) );
200 
201 					Sequence< sal_Int16 > aOutParamIndex;
202 					Sequence< uno::Any > aOutParam;
203 
204 					uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ),
205 						aParamsSeq, aOutParamIndex, aOutParam );
206 
207 					Sequence< float > aScoreSeq;
208 					int nScoreCount = 0;
209 					int nOutParamCount = aOutParam.getLength();
210 					if( nOutParamCount == 1 )
211 					{
212 						const uno::Any* pScoreAnySeq = aOutParam.getConstArray();
213 						if( pScoreAnySeq[0] >>= aScoreSeq )
214 							nScoreCount = aScoreSeq.getLength();
215 					}
216 
217 					Sequence<rtl::OUString> aRetSeq;
218 					if( aRet >>= aRetSeq )
219 					{
220 						if( nQueryListSize > 1 )
221 							aSet.clear();
222 
223 						const rtl::OUString* pRetSeq = aRetSeq.getConstArray();
224 						int nCount = aRetSeq.getLength();
225 						if( nCount > hitCount )
226 							nCount = hitCount;
227 						for( int j = 0 ; j < nCount ; ++j )
228 						{
229 							float fScore = 0.0;
230 							if( j < nScoreCount )
231 								fScore = aScoreSeq[j];
232 
233 							rtl::OUString aURL = pRetSeq[j];
234 							pQueryResultVector->push_back( HitItem( aURL, fScore ) );
235 							if( nQueryListSize > 1 )
236 								aSet.insert( aURL );
237 
238 #ifdef LOGGING
239 							if( pFile )
240 							{
241 								rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8));
242 								fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() );
243 							}
244 #endif
245 						}
246 					}
247 
248 					// intersect
249 					if( nQueryListSize > 1 )
250 					{
251 						if( i == 0 )
252 						{
253 							aResultSet = aSet;
254 						}
255 						else
256 						{
257 							aCurrent = aResultSet;
258 							aResultSet.clear();
259 							set_intersection( aSet.begin(),aSet.end(),
260 											  aCurrent.begin(),aCurrent.end(),
261 											  inserter(aResultSet,aResultSet.begin()));
262 						}
263 					}
264 				}
265 
266 				// Combine results in aIndexFolderResultVector
267 				if( nQueryListSize > 1 )
268 				{
269 					for( int n = 0 ; n < nQueryListSize ; ++n )
270 					{
271 						vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n];
272 						vector<HitItem>& rQueryResultVector = *pQueryResultVector;
273 
274 						int nItemCount = rQueryResultVector.size();
275 						for( int i = 0 ; i < nItemCount ; ++i )
276 						{
277 							const HitItem& rItem = rQueryResultVector[ i ];
278 							set< rtl::OUString >::iterator it;
279 							if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() )
280 							{
281 								HitItem aItemCopy( rItem );
282 								aItemCopy.m_fScore /= nQueryListSize;	// To get average score
283 								if( n == 0 )
284 								{
285 									// Use first pass to create entry
286 									aIndexFolderResultVector.push_back( aItemCopy );
287 
288 #ifdef LOGGING
289 									if( pFile )
290 									{
291 										rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
292 										fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() );
293 									}
294 #endif
295 								}
296 								else
297 								{
298 									// Find entry in vector
299 									int nCount = aIndexFolderResultVector.size();
300 									for( int j = 0 ; j < nCount ; ++j )
301 									{
302 										HitItem& rFindItem = aIndexFolderResultVector[ j ];
303 										if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) )
304 										{
305 #ifdef LOGGING
306 											if( pFile )
307 											{
308 												rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
309 												fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i,
310 													rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() );
311 											}
312 #endif
313 
314 											rFindItem.m_fScore += aItemCopy.m_fScore;
315 											break;
316 										}
317 									}
318 								}
319 							}
320 						}
321 
322 						delete pQueryResultVector;
323 					}
324 
325 					sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() );
326 				}
327 
328 				vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector );
329 				aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector );
330 				aIndexFolderResultVector.clear();
331 			}
332 			catch( const Exception& )
333 			{
334 			}
335 
336 			++iDir;
337 
338 			if( bTemporary )
339 				aIndexFolderIt.deleteTempIndexFolder( idxDir );
340 
341 		}	// Iterator
342 
343 
344 		int nVectorCount = aIndexFolderResultVectorVector.size();
345 		vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount];
346 		for( int j = 0 ; j < nVectorCount ; ++j )
347 			pCurrentVectorIndex[j] = 0;
348 
349 #ifdef LOGGING
350 		if( pFile )
351 		{
352 			for( int k = 0 ; k < nVectorCount ; ++k )
353 			{
354 				vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
355 				int nItemCount = rIndexFolderVector.size();
356 
357 				fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount );
358 
359 				for( int i = 0 ; i < nItemCount ; ++i )
360 				{
361 					const HitItem& rItem = rIndexFolderVector[ i ];
362 					rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8));
363 					fprintf( pFile, "    Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() );
364 				}
365 			}
366 		}
367 #endif
368 
369 		sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount();
370 		sal_Int32 nHitCount = 0;
371 		while( nHitCount < nTotalHitCount )
372 		{
373 			int iVectorWithBestScore = -1;
374 			float fBestScore = 0.0;
375 			for( int k = 0 ; k < nVectorCount ; ++k )
376 			{
377 				vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
378 				if( pCurrentVectorIndex[k] < rIndexFolderVector.size() )
379 				{
380 					const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ];
381 
382 					if( fBestScore < rItem.m_fScore )
383 					{
384 						fBestScore = rItem.m_fScore;
385 						iVectorWithBestScore = k;
386 					}
387 				}
388 			}
389 
390 			if( iVectorWithBestScore == -1 )	// No item left at all
391 				break;
392 
393 			vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore];
394 			const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ];
395 
396 			pCurrentVectorIndex[iVectorWithBestScore]++;
397 
398 			aCompleteResultVector.push_back( rItem.m_aURL );
399 			++nHitCount;
400 		}
401 
402 		delete[] pCurrentVectorIndex;
403 		for( int n = 0 ; n < nVectorCount ; ++n )
404 		{
405 			vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n];
406 			delete pIndexFolderVector;
407 		}
408 
409 #ifdef LOGGING
410 		fclose( pFile );
411 #endif
412 	}
413 
414 	sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength();
415 	rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" );
416 
417 	int nResultCount = aCompleteResultVector.size();
418 	for( int r = 0 ; r < nResultCount ; ++r )
419 	{
420 		rtl::OUString aURL = aCompleteResultVector[r];
421 		rtl::OUString aResultStr = replWith + aURL.copy(replIdx);
422   		m_aPath.push_back( aResultStr );
423 	}
424 
425 	m_aItems.resize( m_aPath.size() );
426 	m_aIdents.resize( m_aPath.size() );
427 
428 	Command aCommand;
429 	aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" );
430 	aCommand.Argument <<= m_sProperty;
431 
432 	for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow )
433 	{
434 		m_aPath[m_nRow] =
435 			m_aPath[m_nRow]                                          +
436 			rtl::OUString::createFromAscii( "?Language=" )           +
437 			m_aURLParameter.get_language()                           +
438 			rtl::OUString::createFromAscii( "&System=" )             +
439 			m_aURLParameter.get_system();
440 
441 		uno::Reference< XContent > content = queryContent();
442 		if( content.is() )
443 		{
444 			uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY );
445 			cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>=
446 		}
447 	}
448 	m_nRow = 0xffffffff;
449 }
450