xref: /trunk/main/xmlhelp/source/cxxhelp/provider/resultsetforquery.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_xmlhelp.hxx"
30 #include <com/sun/star/ucb/Command.hpp>
31 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
32 #include <com/sun/star/i18n/XExtendedTransliteration.hpp>
33 #include <com/sun/star/ucb/XCommandProcessor.hpp>
34 #include <com/sun/star/lang/Locale.hpp>
35 #include <com/sun/star/script/XInvocation.hpp>
36 
37 #ifndef INCLUDED_STL_ALGORITHM
38 #include <algorithm>
39 #define INCLUDED_STL_ALGORITHM
40 #endif
41 #ifndef INCLUDED_STL_SET
42 #include <set>
43 #define INCLUDED_STL_SET
44 #endif
45 
46 #include <qe/Query.hxx>
47 #include <qe/DocGenerator.hxx>
48 #include "resultsetforquery.hxx"
49 #include "databases.hxx"
50 
51 // For testing
52 // #define LOGGING
53 
54 using namespace std;
55 using namespace chelp;
56 using namespace xmlsearch::excep;
57 using namespace xmlsearch::qe;
58 using namespace com::sun::star;
59 using namespace com::sun::star::ucb;
60 using namespace com::sun::star::i18n;
61 using namespace com::sun::star::uno;
62 using namespace com::sun::star::lang;
63 
64 struct HitItem
65 {
66     rtl::OUString   m_aURL;
67     float           m_fScore;
68 
69     HitItem( void ) {}
70     HitItem( const rtl::OUString& aURL, float fScore )
71         : m_aURL( aURL )
72         , m_fScore( fScore )
73     {}
74     bool operator < ( const HitItem& rHitItem ) const
75     {
76         return rHitItem.m_fScore < m_fScore;
77     }
78 };
79 
80 ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >&  xMSF,
81                                       const uno::Reference< XContentProvider >&  xProvider,
82                                       sal_Int32 nOpenMode,
83                                       const uno::Sequence< beans::Property >& seq,
84                                       const uno::Sequence< NumberedSortingInfo >& seqSort,
85                                       URLParameter& aURLParameter,
86                                       Databases* pDatabases )
87     : ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ),
88       m_pDatabases( pDatabases ),
89       m_aURLParameter( aURLParameter )
90 {
91     Reference< XTransliteration > xTrans(
92         xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ),
93         UNO_QUERY );
94     Locale aLocale( aURLParameter.get_language(),
95                     rtl::OUString(),
96                     rtl::OUString() );
97     if(xTrans.is())
98         xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE,
99                            aLocale );
100 
101     // Access Lucene via XInvocation
102     Reference< script::XInvocation > xInvocation(
103         xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ),
104         UNO_QUERY );
105 
106     vector< vector< rtl::OUString > > queryList;
107     {
108         sal_Int32 idx;
109         rtl::OUString query = m_aURLParameter.get_query();
110         while( query.getLength() )
111         {
112             idx = query.indexOf( sal_Unicode( ' ' ) );
113             if( idx == -1 )
114                 idx = query.getLength();
115 
116             vector< rtl::OUString > currentQuery;
117             rtl::OUString tmp(query.copy( 0,idx ));
118             rtl:: OUString toliterate = tmp;
119             if(xTrans.is()) {
120                 Sequence<sal_Int32> aSeq;
121                 toliterate = xTrans->transliterate(
122                     tmp,0,tmp.getLength(),aSeq);
123             }
124 
125             currentQuery.push_back( toliterate );
126             queryList.push_back( currentQuery );
127 
128             int nCpy = 1 + idx;
129             if( nCpy >= query.getLength() )
130                 query = rtl::OUString();
131             else
132                 query = query.copy( 1 + idx );
133         }
134     }
135 
136     vector< rtl::OUString > aCompleteResultVector;
137     if( xInvocation.is() )
138     {
139         rtl::OUString scope = m_aURLParameter.get_scope();
140         bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 );
141         sal_Int32 hitCount = m_aURLParameter.get_hitCount();
142 
143 #ifdef LOGGING
144         FILE* pFile = fopen( "d:\\resultset_out.txt", "w" );
145 #endif
146 
147         IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() );
148         rtl::OUString idxDir;
149         bool bExtension = false;
150         int iDir = 0;
151         vector< vector<HitItem>* > aIndexFolderResultVectorVector;
152 
153         bool bTemporary;
154         while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 )
155         {
156             vector<HitItem> aIndexFolderResultVector;
157 
158             try
159             {
160                 vector< vector<HitItem>* > aQueryListResultVectorVector;
161                 set< rtl::OUString > aSet,aCurrent,aResultSet;
162 
163                 int nQueryListSize = queryList.size();
164                 if( nQueryListSize > 1 )
165                     hitCount = 2000;
166 
167                 for( int i = 0; i < nQueryListSize; ++i )
168                 {
169                     vector<HitItem>* pQueryResultVector;
170                     if( nQueryListSize > 1 )
171                     {
172                         pQueryResultVector = new vector<HitItem>();
173                         aQueryListResultVectorVector.push_back( pQueryResultVector );
174                     }
175                     else
176                     {
177                         pQueryResultVector = &aIndexFolderResultVector;
178                     }
179                     pQueryResultVector->reserve( hitCount );
180 
181                     int nParamCount = bCaptionsOnly ? 7 : 6;
182                     Sequence<uno::Any> aParamsSeq( nParamCount );
183 
184                     aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) );
185                     aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() );
186 
187                     aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) );
188                     rtl::OUString aSystemPath;
189                     osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath );
190                     aParamsSeq[3] = uno::makeAny( aSystemPath );
191 
192                     aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) );
193 
194                     const std::vector< rtl::OUString >& aListItem = queryList[i];
195                     ::rtl::OUString aNewQueryStr = aListItem[0];
196                     aParamsSeq[5] = uno::makeAny( aNewQueryStr );
197 
198                     if( bCaptionsOnly )
199                         aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) );
200 
201                     Sequence< sal_Int16 > aOutParamIndex;
202                     Sequence< uno::Any > aOutParam;
203 
204                     uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ),
205                         aParamsSeq, aOutParamIndex, aOutParam );
206 
207                     Sequence< float > aScoreSeq;
208                     int nScoreCount = 0;
209                     int nOutParamCount = aOutParam.getLength();
210                     if( nOutParamCount == 1 )
211                     {
212                         const uno::Any* pScoreAnySeq = aOutParam.getConstArray();
213                         if( pScoreAnySeq[0] >>= aScoreSeq )
214                             nScoreCount = aScoreSeq.getLength();
215                     }
216 
217                     Sequence<rtl::OUString> aRetSeq;
218                     if( aRet >>= aRetSeq )
219                     {
220                         if( nQueryListSize > 1 )
221                             aSet.clear();
222 
223                         const rtl::OUString* pRetSeq = aRetSeq.getConstArray();
224                         int nCount = aRetSeq.getLength();
225                         if( nCount > hitCount )
226                             nCount = hitCount;
227                         for( int j = 0 ; j < nCount ; ++j )
228                         {
229                             float fScore = 0.0;
230                             if( j < nScoreCount )
231                                 fScore = aScoreSeq[j];
232 
233                             rtl::OUString aURL = pRetSeq[j];
234                             pQueryResultVector->push_back( HitItem( aURL, fScore ) );
235                             if( nQueryListSize > 1 )
236                                 aSet.insert( aURL );
237 
238 #ifdef LOGGING
239                             if( pFile )
240                             {
241                                 rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8));
242                                 fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() );
243                             }
244 #endif
245                         }
246                     }
247 
248                     // intersect
249                     if( nQueryListSize > 1 )
250                     {
251                         if( i == 0 )
252                         {
253                             aResultSet = aSet;
254                         }
255                         else
256                         {
257                             aCurrent = aResultSet;
258                             aResultSet.clear();
259                             set_intersection( aSet.begin(),aSet.end(),
260                                               aCurrent.begin(),aCurrent.end(),
261                                               inserter(aResultSet,aResultSet.begin()));
262                         }
263                     }
264                 }
265 
266                 // Combine results in aIndexFolderResultVector
267                 if( nQueryListSize > 1 )
268                 {
269                     for( int n = 0 ; n < nQueryListSize ; ++n )
270                     {
271                         vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n];
272                         vector<HitItem>& rQueryResultVector = *pQueryResultVector;
273 
274                         int nItemCount = rQueryResultVector.size();
275                         for( int i = 0 ; i < nItemCount ; ++i )
276                         {
277                             const HitItem& rItem = rQueryResultVector[ i ];
278                             set< rtl::OUString >::iterator it;
279                             if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() )
280                             {
281                                 HitItem aItemCopy( rItem );
282                                 aItemCopy.m_fScore /= nQueryListSize;   // To get average score
283                                 if( n == 0 )
284                                 {
285                                     // Use first pass to create entry
286                                     aIndexFolderResultVector.push_back( aItemCopy );
287 
288 #ifdef LOGGING
289                                     if( pFile )
290                                     {
291                                         rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
292                                         fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() );
293                                     }
294 #endif
295                                 }
296                                 else
297                                 {
298                                     // Find entry in vector
299                                     int nCount = aIndexFolderResultVector.size();
300                                     for( int j = 0 ; j < nCount ; ++j )
301                                     {
302                                         HitItem& rFindItem = aIndexFolderResultVector[ j ];
303                                         if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) )
304                                         {
305 #ifdef LOGGING
306                                             if( pFile )
307                                             {
308                                                 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
309                                                 fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i,
310                                                     rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() );
311                                             }
312 #endif
313 
314                                             rFindItem.m_fScore += aItemCopy.m_fScore;
315                                             break;
316                                         }
317                                     }
318                                 }
319                             }
320                         }
321 
322                         delete pQueryResultVector;
323                     }
324 
325                     sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() );
326                 }
327 
328                 vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector );
329                 aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector );
330                 aIndexFolderResultVector.clear();
331             }
332             catch( const Exception& )
333             {
334             }
335 
336             ++iDir;
337 
338             if( bTemporary )
339                 aIndexFolderIt.deleteTempIndexFolder( idxDir );
340 
341         }   // Iterator
342 
343 
344         int nVectorCount = aIndexFolderResultVectorVector.size();
345         vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount];
346         for( int j = 0 ; j < nVectorCount ; ++j )
347             pCurrentVectorIndex[j] = 0;
348 
349 #ifdef LOGGING
350         if( pFile )
351         {
352             for( int k = 0 ; k < nVectorCount ; ++k )
353             {
354                 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
355                 int nItemCount = rIndexFolderVector.size();
356 
357                 fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount );
358 
359                 for( int i = 0 ; i < nItemCount ; ++i )
360                 {
361                     const HitItem& rItem = rIndexFolderVector[ i ];
362                     rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8));
363                     fprintf( pFile, "    Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() );
364                 }
365             }
366         }
367 #endif
368 
369         sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount();
370         sal_Int32 nHitCount = 0;
371         while( nHitCount < nTotalHitCount )
372         {
373             int iVectorWithBestScore = -1;
374             float fBestScore = 0.0;
375             for( int k = 0 ; k < nVectorCount ; ++k )
376             {
377                 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
378                 if( pCurrentVectorIndex[k] < rIndexFolderVector.size() )
379                 {
380                     const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ];
381 
382                     if( fBestScore < rItem.m_fScore )
383                     {
384                         fBestScore = rItem.m_fScore;
385                         iVectorWithBestScore = k;
386                     }
387                 }
388             }
389 
390             if( iVectorWithBestScore == -1 )    // No item left at all
391                 break;
392 
393             vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore];
394             const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ];
395 
396             pCurrentVectorIndex[iVectorWithBestScore]++;
397 
398             aCompleteResultVector.push_back( rItem.m_aURL );
399             ++nHitCount;
400         }
401 
402         delete[] pCurrentVectorIndex;
403         for( int n = 0 ; n < nVectorCount ; ++n )
404         {
405             vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n];
406             delete pIndexFolderVector;
407         }
408 
409 #ifdef LOGGING
410         fclose( pFile );
411 #endif
412     }
413 
414     sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength();
415     rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" );
416 
417     int nResultCount = aCompleteResultVector.size();
418     for( int r = 0 ; r < nResultCount ; ++r )
419     {
420         rtl::OUString aURL = aCompleteResultVector[r];
421         rtl::OUString aResultStr = replWith + aURL.copy(replIdx);
422         m_aPath.push_back( aResultStr );
423     }
424 
425     m_aItems.resize( m_aPath.size() );
426     m_aIdents.resize( m_aPath.size() );
427 
428     Command aCommand;
429     aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" );
430     aCommand.Argument <<= m_sProperty;
431 
432     for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow )
433     {
434         m_aPath[m_nRow] =
435             m_aPath[m_nRow]                                          +
436             rtl::OUString::createFromAscii( "?Language=" )           +
437             m_aURLParameter.get_language()                           +
438             rtl::OUString::createFromAscii( "&System=" )             +
439             m_aURLParameter.get_system();
440 
441         uno::Reference< XContent > content = queryContent();
442         if( content.is() )
443         {
444             uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY );
445             cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>=
446         }
447     }
448     m_nRow = 0xffffffff;
449 }
450