1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_xmlhelp.hxx"
26 #include <com/sun/star/ucb/Command.hpp>
27 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
28 #include <com/sun/star/i18n/XExtendedTransliteration.hpp>
29 #include <com/sun/star/ucb/XCommandProcessor.hpp>
30 #include <com/sun/star/lang/Locale.hpp>
31 #include <com/sun/star/script/XInvocation.hpp>
32
33 #ifndef INCLUDED_STL_ALGORITHM
34 #include <algorithm>
35 #define INCLUDED_STL_ALGORITHM
36 #endif
37 #ifndef INCLUDED_STL_SET
38 #include <set>
39 #define INCLUDED_STL_SET
40 #endif
41
42 #include <qe/Query.hxx>
43 #include <qe/DocGenerator.hxx>
44 #include "resultsetforquery.hxx"
45 #include "databases.hxx"
46
47 // For testing
48 // #define LOGGING
49
50 using namespace std;
51 using namespace chelp;
52 using namespace xmlsearch::excep;
53 using namespace xmlsearch::qe;
54 using namespace com::sun::star;
55 using namespace com::sun::star::ucb;
56 using namespace com::sun::star::i18n;
57 using namespace com::sun::star::uno;
58 using namespace com::sun::star::lang;
59
60 struct HitItem
61 {
62 rtl::OUString m_aURL;
63 float m_fScore;
64
HitItemHitItem65 HitItem( void ) {}
HitItemHitItem66 HitItem( const rtl::OUString& aURL, float fScore )
67 : m_aURL( aURL )
68 , m_fScore( fScore )
69 {}
operator <HitItem70 bool operator < ( const HitItem& rHitItem ) const
71 {
72 return rHitItem.m_fScore < m_fScore;
73 }
74 };
75
ResultSetForQuery(const uno::Reference<lang::XMultiServiceFactory> & xMSF,const uno::Reference<XContentProvider> & xProvider,sal_Int32 nOpenMode,const uno::Sequence<beans::Property> & seq,const uno::Sequence<NumberedSortingInfo> & seqSort,URLParameter & aURLParameter,Databases * pDatabases)76 ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >& xMSF,
77 const uno::Reference< XContentProvider >& xProvider,
78 sal_Int32 nOpenMode,
79 const uno::Sequence< beans::Property >& seq,
80 const uno::Sequence< NumberedSortingInfo >& seqSort,
81 URLParameter& aURLParameter,
82 Databases* pDatabases )
83 : ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ),
84 m_pDatabases( pDatabases ),
85 m_aURLParameter( aURLParameter )
86 {
87 Reference< XTransliteration > xTrans(
88 xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ),
89 UNO_QUERY );
90 Locale aLocale( aURLParameter.get_language(),
91 rtl::OUString(),
92 rtl::OUString() );
93 if(xTrans.is())
94 xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE,
95 aLocale );
96
97 // Access Lucene via XInvocation
98 Reference< script::XInvocation > xInvocation(
99 xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ),
100 UNO_QUERY );
101
102 vector< vector< rtl::OUString > > queryList;
103 {
104 sal_Int32 idx;
105 rtl::OUString query = m_aURLParameter.get_query();
106 while( query.getLength() )
107 {
108 idx = query.indexOf( sal_Unicode( ' ' ) );
109 if( idx == -1 )
110 idx = query.getLength();
111
112 vector< rtl::OUString > currentQuery;
113 rtl::OUString tmp(query.copy( 0,idx ));
114 rtl:: OUString toliterate = tmp;
115 if(xTrans.is()) {
116 Sequence<sal_Int32> aSeq;
117 toliterate = xTrans->transliterate(
118 tmp,0,tmp.getLength(),aSeq);
119 }
120
121 currentQuery.push_back( toliterate );
122 queryList.push_back( currentQuery );
123
124 int nCpy = 1 + idx;
125 if( nCpy >= query.getLength() )
126 query = rtl::OUString();
127 else
128 query = query.copy( 1 + idx );
129 }
130 }
131
132 vector< rtl::OUString > aCompleteResultVector;
133 if( xInvocation.is() )
134 {
135 rtl::OUString scope = m_aURLParameter.get_scope();
136 bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 );
137 sal_Int32 hitCount = m_aURLParameter.get_hitCount();
138
139 #ifdef LOGGING
140 FILE* pFile = fopen( "d:\\resultset_out.txt", "w" );
141 #endif
142
143 IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() );
144 rtl::OUString idxDir;
145 bool bExtension = false;
146 int iDir = 0;
147 vector< vector<HitItem>* > aIndexFolderResultVectorVector;
148
149 bool bTemporary;
150 while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 )
151 {
152 vector<HitItem> aIndexFolderResultVector;
153
154 try
155 {
156 vector< vector<HitItem>* > aQueryListResultVectorVector;
157 set< rtl::OUString > aSet,aCurrent,aResultSet;
158
159 int nQueryListSize = queryList.size();
160 if( nQueryListSize > 1 )
161 hitCount = 2000;
162
163 for( int i = 0; i < nQueryListSize; ++i )
164 {
165 vector<HitItem>* pQueryResultVector;
166 if( nQueryListSize > 1 )
167 {
168 pQueryResultVector = new vector<HitItem>();
169 aQueryListResultVectorVector.push_back( pQueryResultVector );
170 }
171 else
172 {
173 pQueryResultVector = &aIndexFolderResultVector;
174 }
175 pQueryResultVector->reserve( hitCount );
176
177 int nParamCount = bCaptionsOnly ? 7 : 6;
178 Sequence<uno::Any> aParamsSeq( nParamCount );
179
180 aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) );
181 aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() );
182
183 aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) );
184 rtl::OUString aSystemPath;
185 osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath );
186 aParamsSeq[3] = uno::makeAny( aSystemPath );
187
188 aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) );
189
190 const std::vector< rtl::OUString >& aListItem = queryList[i];
191 ::rtl::OUString aNewQueryStr = aListItem[0];
192 aParamsSeq[5] = uno::makeAny( aNewQueryStr );
193
194 if( bCaptionsOnly )
195 aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) );
196
197 Sequence< sal_Int16 > aOutParamIndex;
198 Sequence< uno::Any > aOutParam;
199
200 uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ),
201 aParamsSeq, aOutParamIndex, aOutParam );
202
203 Sequence< float > aScoreSeq;
204 int nScoreCount = 0;
205 int nOutParamCount = aOutParam.getLength();
206 if( nOutParamCount == 1 )
207 {
208 const uno::Any* pScoreAnySeq = aOutParam.getConstArray();
209 if( pScoreAnySeq[0] >>= aScoreSeq )
210 nScoreCount = aScoreSeq.getLength();
211 }
212
213 Sequence<rtl::OUString> aRetSeq;
214 if( aRet >>= aRetSeq )
215 {
216 if( nQueryListSize > 1 )
217 aSet.clear();
218
219 const rtl::OUString* pRetSeq = aRetSeq.getConstArray();
220 int nCount = aRetSeq.getLength();
221 if( nCount > hitCount )
222 nCount = hitCount;
223 for( int j = 0 ; j < nCount ; ++j )
224 {
225 float fScore = 0.0;
226 if( j < nScoreCount )
227 fScore = aScoreSeq[j];
228
229 rtl::OUString aURL = pRetSeq[j];
230 pQueryResultVector->push_back( HitItem( aURL, fScore ) );
231 if( nQueryListSize > 1 )
232 aSet.insert( aURL );
233
234 #ifdef LOGGING
235 if( pFile )
236 {
237 rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8));
238 fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() );
239 }
240 #endif
241 }
242 }
243
244 // intersect
245 if( nQueryListSize > 1 )
246 {
247 if( i == 0 )
248 {
249 aResultSet = aSet;
250 }
251 else
252 {
253 aCurrent = aResultSet;
254 aResultSet.clear();
255 set_intersection( aSet.begin(),aSet.end(),
256 aCurrent.begin(),aCurrent.end(),
257 inserter(aResultSet,aResultSet.begin()));
258 }
259 }
260 }
261
262 // Combine results in aIndexFolderResultVector
263 if( nQueryListSize > 1 )
264 {
265 for( int n = 0 ; n < nQueryListSize ; ++n )
266 {
267 vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n];
268 vector<HitItem>& rQueryResultVector = *pQueryResultVector;
269
270 int nItemCount = rQueryResultVector.size();
271 for( int i = 0 ; i < nItemCount ; ++i )
272 {
273 const HitItem& rItem = rQueryResultVector[ i ];
274 set< rtl::OUString >::iterator it;
275 if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() )
276 {
277 HitItem aItemCopy( rItem );
278 aItemCopy.m_fScore /= nQueryListSize; // To get average score
279 if( n == 0 )
280 {
281 // Use first pass to create entry
282 aIndexFolderResultVector.push_back( aItemCopy );
283
284 #ifdef LOGGING
285 if( pFile )
286 {
287 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
288 fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() );
289 }
290 #endif
291 }
292 else
293 {
294 // Find entry in vector
295 int nCount = aIndexFolderResultVector.size();
296 for( int j = 0 ; j < nCount ; ++j )
297 {
298 HitItem& rFindItem = aIndexFolderResultVector[ j ];
299 if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) )
300 {
301 #ifdef LOGGING
302 if( pFile )
303 {
304 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
305 fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i,
306 rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() );
307 }
308 #endif
309
310 rFindItem.m_fScore += aItemCopy.m_fScore;
311 break;
312 }
313 }
314 }
315 }
316 }
317
318 delete pQueryResultVector;
319 }
320
321 sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() );
322 }
323
324 vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector );
325 aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector );
326 aIndexFolderResultVector.clear();
327 }
328 catch( const Exception& )
329 {
330 }
331
332 ++iDir;
333
334 if( bTemporary )
335 aIndexFolderIt.deleteTempIndexFolder( idxDir );
336
337 } // Iterator
338
339
340 int nVectorCount = aIndexFolderResultVectorVector.size();
341 vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount];
342 for( int j = 0 ; j < nVectorCount ; ++j )
343 pCurrentVectorIndex[j] = 0;
344
345 #ifdef LOGGING
346 if( pFile )
347 {
348 for( int k = 0 ; k < nVectorCount ; ++k )
349 {
350 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
351 int nItemCount = rIndexFolderVector.size();
352
353 fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount );
354
355 for( int i = 0 ; i < nItemCount ; ++i )
356 {
357 const HitItem& rItem = rIndexFolderVector[ i ];
358 rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8));
359 fprintf( pFile, " Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() );
360 }
361 }
362 }
363 #endif
364
365 sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount();
366 sal_Int32 nHitCount = 0;
367 while( nHitCount < nTotalHitCount )
368 {
369 int iVectorWithBestScore = -1;
370 float fBestScore = 0.0;
371 for( int k = 0 ; k < nVectorCount ; ++k )
372 {
373 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
374 if( pCurrentVectorIndex[k] < rIndexFolderVector.size() )
375 {
376 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ];
377
378 if( fBestScore < rItem.m_fScore )
379 {
380 fBestScore = rItem.m_fScore;
381 iVectorWithBestScore = k;
382 }
383 }
384 }
385
386 if( iVectorWithBestScore == -1 ) // No item left at all
387 break;
388
389 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore];
390 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ];
391
392 pCurrentVectorIndex[iVectorWithBestScore]++;
393
394 aCompleteResultVector.push_back( rItem.m_aURL );
395 ++nHitCount;
396 }
397
398 delete[] pCurrentVectorIndex;
399 for( int n = 0 ; n < nVectorCount ; ++n )
400 {
401 vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n];
402 delete pIndexFolderVector;
403 }
404
405 #ifdef LOGGING
406 fclose( pFile );
407 #endif
408 }
409
410 sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength();
411 rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" );
412
413 int nResultCount = aCompleteResultVector.size();
414 for( int r = 0 ; r < nResultCount ; ++r )
415 {
416 rtl::OUString aURL = aCompleteResultVector[r];
417 rtl::OUString aResultStr = replWith + aURL.copy(replIdx);
418 m_aPath.push_back( aResultStr );
419 }
420
421 m_aItems.resize( m_aPath.size() );
422 m_aIdents.resize( m_aPath.size() );
423
424 Command aCommand;
425 aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" );
426 aCommand.Argument <<= m_sProperty;
427
428 for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow )
429 {
430 m_aPath[m_nRow] =
431 m_aPath[m_nRow] +
432 rtl::OUString::createFromAscii( "?Language=" ) +
433 m_aURLParameter.get_language() +
434 rtl::OUString::createFromAscii( "&System=" ) +
435 m_aURLParameter.get_system();
436
437 uno::Reference< XContent > content = queryContent();
438 if( content.is() )
439 {
440 uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY );
441 cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>=
442 }
443 }
444 m_nRow = 0xffffffff;
445 }
446