1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_xmlhelp.hxx"
26 #include <com/sun/star/ucb/Command.hpp>
27 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
28 #include <com/sun/star/i18n/XExtendedTransliteration.hpp>
29 #include <com/sun/star/ucb/XCommandProcessor.hpp>
30 #include <com/sun/star/lang/Locale.hpp>
31 #include <com/sun/star/script/XInvocation.hpp>
32 
33 #ifndef INCLUDED_STL_ALGORITHM
34 #include <algorithm>
35 #define INCLUDED_STL_ALGORITHM
36 #endif
37 #ifndef INCLUDED_STL_SET
38 #include <set>
39 #define INCLUDED_STL_SET
40 #endif
41 
42 #include <qe/Query.hxx>
43 #include <qe/DocGenerator.hxx>
44 #include "resultsetforquery.hxx"
45 #include "databases.hxx"
46 
47 // For testing
48 // #define LOGGING
49 
50 using namespace std;
51 using namespace chelp;
52 using namespace xmlsearch::excep;
53 using namespace xmlsearch::qe;
54 using namespace com::sun::star;
55 using namespace com::sun::star::ucb;
56 using namespace com::sun::star::i18n;
57 using namespace com::sun::star::uno;
58 using namespace com::sun::star::lang;
59 
60 struct HitItem
61 {
62 	rtl::OUString	m_aURL;
63 	float			m_fScore;
64 
HitItemHitItem65 	HitItem( void )	{}
HitItemHitItem66 	HitItem( const rtl::OUString& aURL, float fScore )
67 		: m_aURL( aURL )
68 		, m_fScore( fScore )
69 	{}
operator <HitItem70 	bool operator < ( const HitItem& rHitItem ) const
71 	{
72 		return rHitItem.m_fScore < m_fScore;
73 	}
74 };
75 
ResultSetForQuery(const uno::Reference<lang::XMultiServiceFactory> & xMSF,const uno::Reference<XContentProvider> & xProvider,sal_Int32 nOpenMode,const uno::Sequence<beans::Property> & seq,const uno::Sequence<NumberedSortingInfo> & seqSort,URLParameter & aURLParameter,Databases * pDatabases)76 ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >&  xMSF,
77 									  const uno::Reference< XContentProvider >&  xProvider,
78 									  sal_Int32 nOpenMode,
79 									  const uno::Sequence< beans::Property >& seq,
80 									  const uno::Sequence< NumberedSortingInfo >& seqSort,
81 									  URLParameter& aURLParameter,
82 									  Databases* pDatabases )
83 	: ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ),
84 	  m_pDatabases( pDatabases ),
85       m_aURLParameter( aURLParameter )
86 {
87     Reference< XTransliteration > xTrans(
88         xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ),
89         UNO_QUERY );
90     Locale aLocale( aURLParameter.get_language(),
91                     rtl::OUString(),
92                     rtl::OUString() );
93     if(xTrans.is())
94         xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE,
95                            aLocale );
96 
97 	// Access Lucene via XInvocation
98 	Reference< script::XInvocation > xInvocation(
99         xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ),
100         UNO_QUERY );
101 
102 	vector< vector< rtl::OUString > > queryList;
103 	{
104 		sal_Int32 idx;
105 		rtl::OUString query = m_aURLParameter.get_query();
106 		while( query.getLength() )
107 		{
108 			idx = query.indexOf( sal_Unicode( ' ' ) );
109 			if( idx == -1 )
110 				idx = query.getLength();
111 
112 			vector< rtl::OUString > currentQuery;
113             rtl::OUString tmp(query.copy( 0,idx ));
114             rtl:: OUString toliterate = tmp;
115             if(xTrans.is()) {
116                 Sequence<sal_Int32> aSeq;
117                 toliterate = xTrans->transliterate(
118                     tmp,0,tmp.getLength(),aSeq);
119             }
120 
121 			currentQuery.push_back( toliterate );
122 			queryList.push_back( currentQuery );
123 
124 			int nCpy = 1 + idx;
125 			if( nCpy >= query.getLength() )
126 				query = rtl::OUString();
127 			else
128 				query = query.copy( 1 + idx );
129 		}
130 	}
131 
132 	vector< rtl::OUString > aCompleteResultVector;
133 	if( xInvocation.is() )
134 	{
135 		rtl::OUString scope = m_aURLParameter.get_scope();
136 		bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 );
137 		sal_Int32 hitCount = m_aURLParameter.get_hitCount();
138 
139 #ifdef LOGGING
140 		FILE* pFile = fopen( "d:\\resultset_out.txt", "w" );
141 #endif
142 
143 		IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() );
144 		rtl::OUString idxDir;
145 		bool bExtension = false;
146 		int iDir = 0;
147 		vector< vector<HitItem>* > aIndexFolderResultVectorVector;
148 
149 		bool bTemporary;
150 		while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 )
151 		{
152 			vector<HitItem> aIndexFolderResultVector;
153 
154 			try
155 			{
156 				vector< vector<HitItem>* > aQueryListResultVectorVector;
157 				set< rtl::OUString > aSet,aCurrent,aResultSet;
158 
159 				int nQueryListSize = queryList.size();
160 				if( nQueryListSize > 1 )
161 					hitCount = 2000;
162 
163 				for( int i = 0; i < nQueryListSize; ++i )
164 				{
165 					vector<HitItem>* pQueryResultVector;
166 					if( nQueryListSize > 1 )
167 					{
168 						pQueryResultVector = new vector<HitItem>();
169 						aQueryListResultVectorVector.push_back( pQueryResultVector );
170 					}
171 					else
172 					{
173 						pQueryResultVector = &aIndexFolderResultVector;
174 					}
175 					pQueryResultVector->reserve( hitCount );
176 
177 					int nParamCount = bCaptionsOnly ? 7 : 6;
178 					Sequence<uno::Any> aParamsSeq( nParamCount );
179 
180 					aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) );
181 					aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() );
182 
183 					aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) );
184 					rtl::OUString aSystemPath;
185 					osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath );
186 					aParamsSeq[3] = uno::makeAny( aSystemPath );
187 
188 					aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) );
189 
190 					const std::vector< rtl::OUString >& aListItem = queryList[i];
191 					::rtl::OUString aNewQueryStr = aListItem[0];
192 					aParamsSeq[5] = uno::makeAny( aNewQueryStr );
193 
194 					if( bCaptionsOnly )
195 						aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) );
196 
197 					Sequence< sal_Int16 > aOutParamIndex;
198 					Sequence< uno::Any > aOutParam;
199 
200 					uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ),
201 						aParamsSeq, aOutParamIndex, aOutParam );
202 
203 					Sequence< float > aScoreSeq;
204 					int nScoreCount = 0;
205 					int nOutParamCount = aOutParam.getLength();
206 					if( nOutParamCount == 1 )
207 					{
208 						const uno::Any* pScoreAnySeq = aOutParam.getConstArray();
209 						if( pScoreAnySeq[0] >>= aScoreSeq )
210 							nScoreCount = aScoreSeq.getLength();
211 					}
212 
213 					Sequence<rtl::OUString> aRetSeq;
214 					if( aRet >>= aRetSeq )
215 					{
216 						if( nQueryListSize > 1 )
217 							aSet.clear();
218 
219 						const rtl::OUString* pRetSeq = aRetSeq.getConstArray();
220 						int nCount = aRetSeq.getLength();
221 						if( nCount > hitCount )
222 							nCount = hitCount;
223 						for( int j = 0 ; j < nCount ; ++j )
224 						{
225 							float fScore = 0.0;
226 							if( j < nScoreCount )
227 								fScore = aScoreSeq[j];
228 
229 							rtl::OUString aURL = pRetSeq[j];
230 							pQueryResultVector->push_back( HitItem( aURL, fScore ) );
231 							if( nQueryListSize > 1 )
232 								aSet.insert( aURL );
233 
234 #ifdef LOGGING
235 							if( pFile )
236 							{
237 								rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8));
238 								fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() );
239 							}
240 #endif
241 						}
242 					}
243 
244 					// intersect
245 					if( nQueryListSize > 1 )
246 					{
247 						if( i == 0 )
248 						{
249 							aResultSet = aSet;
250 						}
251 						else
252 						{
253 							aCurrent = aResultSet;
254 							aResultSet.clear();
255 							set_intersection( aSet.begin(),aSet.end(),
256 											  aCurrent.begin(),aCurrent.end(),
257 											  inserter(aResultSet,aResultSet.begin()));
258 						}
259 					}
260 				}
261 
262 				// Combine results in aIndexFolderResultVector
263 				if( nQueryListSize > 1 )
264 				{
265 					for( int n = 0 ; n < nQueryListSize ; ++n )
266 					{
267 						vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n];
268 						vector<HitItem>& rQueryResultVector = *pQueryResultVector;
269 
270 						int nItemCount = rQueryResultVector.size();
271 						for( int i = 0 ; i < nItemCount ; ++i )
272 						{
273 							const HitItem& rItem = rQueryResultVector[ i ];
274 							set< rtl::OUString >::iterator it;
275 							if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() )
276 							{
277 								HitItem aItemCopy( rItem );
278 								aItemCopy.m_fScore /= nQueryListSize;	// To get average score
279 								if( n == 0 )
280 								{
281 									// Use first pass to create entry
282 									aIndexFolderResultVector.push_back( aItemCopy );
283 
284 #ifdef LOGGING
285 									if( pFile )
286 									{
287 										rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
288 										fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() );
289 									}
290 #endif
291 								}
292 								else
293 								{
294 									// Find entry in vector
295 									int nCount = aIndexFolderResultVector.size();
296 									for( int j = 0 ; j < nCount ; ++j )
297 									{
298 										HitItem& rFindItem = aIndexFolderResultVector[ j ];
299 										if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) )
300 										{
301 #ifdef LOGGING
302 											if( pFile )
303 											{
304 												rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
305 												fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i,
306 													rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() );
307 											}
308 #endif
309 
310 											rFindItem.m_fScore += aItemCopy.m_fScore;
311 											break;
312 										}
313 									}
314 								}
315 							}
316 						}
317 
318 						delete pQueryResultVector;
319 					}
320 
321 					sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() );
322 				}
323 
324 				vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector );
325 				aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector );
326 				aIndexFolderResultVector.clear();
327 			}
328 			catch( const Exception& )
329 			{
330 			}
331 
332 			++iDir;
333 
334 			if( bTemporary )
335 				aIndexFolderIt.deleteTempIndexFolder( idxDir );
336 
337 		}	// Iterator
338 
339 
340 		int nVectorCount = aIndexFolderResultVectorVector.size();
341 		vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount];
342 		for( int j = 0 ; j < nVectorCount ; ++j )
343 			pCurrentVectorIndex[j] = 0;
344 
345 #ifdef LOGGING
346 		if( pFile )
347 		{
348 			for( int k = 0 ; k < nVectorCount ; ++k )
349 			{
350 				vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
351 				int nItemCount = rIndexFolderVector.size();
352 
353 				fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount );
354 
355 				for( int i = 0 ; i < nItemCount ; ++i )
356 				{
357 					const HitItem& rItem = rIndexFolderVector[ i ];
358 					rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8));
359 					fprintf( pFile, "    Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() );
360 				}
361 			}
362 		}
363 #endif
364 
365 		sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount();
366 		sal_Int32 nHitCount = 0;
367 		while( nHitCount < nTotalHitCount )
368 		{
369 			int iVectorWithBestScore = -1;
370 			float fBestScore = 0.0;
371 			for( int k = 0 ; k < nVectorCount ; ++k )
372 			{
373 				vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
374 				if( pCurrentVectorIndex[k] < rIndexFolderVector.size() )
375 				{
376 					const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ];
377 
378 					if( fBestScore < rItem.m_fScore )
379 					{
380 						fBestScore = rItem.m_fScore;
381 						iVectorWithBestScore = k;
382 					}
383 				}
384 			}
385 
386 			if( iVectorWithBestScore == -1 )	// No item left at all
387 				break;
388 
389 			vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore];
390 			const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ];
391 
392 			pCurrentVectorIndex[iVectorWithBestScore]++;
393 
394 			aCompleteResultVector.push_back( rItem.m_aURL );
395 			++nHitCount;
396 		}
397 
398 		delete[] pCurrentVectorIndex;
399 		for( int n = 0 ; n < nVectorCount ; ++n )
400 		{
401 			vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n];
402 			delete pIndexFolderVector;
403 		}
404 
405 #ifdef LOGGING
406 		fclose( pFile );
407 #endif
408 	}
409 
410 	sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength();
411 	rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" );
412 
413 	int nResultCount = aCompleteResultVector.size();
414 	for( int r = 0 ; r < nResultCount ; ++r )
415 	{
416 		rtl::OUString aURL = aCompleteResultVector[r];
417 		rtl::OUString aResultStr = replWith + aURL.copy(replIdx);
418   		m_aPath.push_back( aResultStr );
419 	}
420 
421 	m_aItems.resize( m_aPath.size() );
422 	m_aIdents.resize( m_aPath.size() );
423 
424 	Command aCommand;
425 	aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" );
426 	aCommand.Argument <<= m_sProperty;
427 
428 	for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow )
429 	{
430 		m_aPath[m_nRow] =
431 			m_aPath[m_nRow]                                          +
432 			rtl::OUString::createFromAscii( "?Language=" )           +
433 			m_aURLParameter.get_language()                           +
434 			rtl::OUString::createFromAscii( "&System=" )             +
435 			m_aURLParameter.get_system();
436 
437 		uno::Reference< XContent > content = queryContent();
438 		if( content.is() )
439 		{
440 			uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY );
441 			cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>=
442 		}
443 	}
444 	m_nRow = 0xffffffff;
445 }
446