1 /***************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 #ifndef SIMPLEGUESSER_H
28 #define SIMPLEGUESSER_H
29 
30 #include <string.h>
31 #include <string>
32 #include <cstdlib>
33 #include <vector>
34 #include <guess.hxx>
35 
36 #define MAX_STRING_LENGTH_TO_ANALYSE 200
37 
38 using namespace std;
39 
40 /**
41 @author Jocelyn Merand
42 */
43 class SimpleGuesser{
44 public:
45     /**inits the object with conf file "./conf.txt"*/
46     SimpleGuesser();
47 
48     /** Compares the current Simpleguesser with an other
49      * @param SimpleGuesser& sg the other guesser to compare
50      */
51     void operator=(SimpleGuesser& sg);
52 
53     /**
54      * destroy the object
55      */
56     ~SimpleGuesser();
57 
58     /**
59      * Analyze a text and return the most probable languages of the text
60      * @param char* text is the text to analyze
61      * @return the list of guess
62      */
63     vector<Guess> GuessLanguage(char* text);
64 
65     /**
66      * Analyze a text and return the most probable language of the text
67      * @param char* text is the text to analyze
68      * @return the guess (containing language)
69      */
70     Guess GuessPrimaryLanguage(char* text);
71 
72     /**
73      * List all available languages (possibly to be in guesses)
74      * @return the list of languages
75      */
76     vector<Guess> GetAvailableLanguages();
77 
78     /**
79      * List all languages (possibly in guesses or not)
80      * @return the list of languages
81      */
82     vector<Guess> GetAllManagedLanguages();
83 
84     /**
85      * List all Unavailable languages (disable for any reason)
86      * @return the list of languages
87      */
88     vector<Guess> GetUnavailableLanguages();
89 
90     /**
91      * Mark a language enabled
92      * @param string lang the language to enable (build like language-COUNTRY-encoding)
93      */
94     void EnableLanguage(string lang);
95 
96     /**
97      * Mark a language disabled
98      * @param string lang the language to disable (build like language-COUNTRY-encoding)
99      */
100     void DisableLanguage(string lang);
101 
102     /**
103      * Load a new DB of fingerprints
104      * @param const char* thePathOfConfFile self explaining
105      * @param const char* prefix is the path where the directory witch contains fingerprint files is stored
106     */
107     void SetDBPath(const char* thePathOfConfFile, const char* prefix);
108 
109 protected:
110 
111     //Where typical fingerprints (n-gram tables) are stored
112     void* h;
113 
114     //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both
115     vector<Guess> GetManagedLanguages(const char mask);
116 
117     //Like getManagedLanguages, this function enable or disable a language and it depends of the mask
118     void XableLanguage(string lang, char mask);
119 };
120 
121 #endif
122