1*e7675e54SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*e7675e54SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*e7675e54SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*e7675e54SAndrew Rist  * distributed with this work for additional information
6*e7675e54SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*e7675e54SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*e7675e54SAndrew Rist  * "License"); you may not use this file except in compliance
9*e7675e54SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*e7675e54SAndrew Rist  *
11*e7675e54SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*e7675e54SAndrew Rist  *
13*e7675e54SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*e7675e54SAndrew Rist  * software distributed under the License is distributed on an
15*e7675e54SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*e7675e54SAndrew Rist  * KIND, either express or implied.  See the License for the
17*e7675e54SAndrew Rist  * specific language governing permissions and limitations
18*e7675e54SAndrew Rist  * under the License.
19*e7675e54SAndrew Rist  *
20*e7675e54SAndrew Rist  *************************************************************/
21*e7675e54SAndrew Rist 
22*e7675e54SAndrew Rist 
23cdf0e10cSrcweir #ifndef SIMPLEGUESSER_H
24cdf0e10cSrcweir #define SIMPLEGUESSER_H
25cdf0e10cSrcweir 
26cdf0e10cSrcweir #include <string.h>
27cdf0e10cSrcweir #include <string>
28cdf0e10cSrcweir #include <cstdlib>
29cdf0e10cSrcweir #include <vector>
30cdf0e10cSrcweir #include <guess.hxx>
31cdf0e10cSrcweir 
32cdf0e10cSrcweir #define MAX_STRING_LENGTH_TO_ANALYSE 200
33cdf0e10cSrcweir 
34cdf0e10cSrcweir using namespace std;
35cdf0e10cSrcweir 
36cdf0e10cSrcweir /**
37cdf0e10cSrcweir @author Jocelyn Merand
38cdf0e10cSrcweir */
39cdf0e10cSrcweir class SimpleGuesser{
40cdf0e10cSrcweir public:
41cdf0e10cSrcweir     /**inits the object with conf file "./conf.txt"*/
42cdf0e10cSrcweir     SimpleGuesser();
43cdf0e10cSrcweir 
44cdf0e10cSrcweir     /** Compares the current Simpleguesser with an other
45cdf0e10cSrcweir      * @param SimpleGuesser& sg the other guesser to compare
46cdf0e10cSrcweir      */
47cdf0e10cSrcweir     void operator=(SimpleGuesser& sg);
48cdf0e10cSrcweir 
49cdf0e10cSrcweir     /**
50cdf0e10cSrcweir      * destroy the object
51cdf0e10cSrcweir      */
52cdf0e10cSrcweir     ~SimpleGuesser();
53cdf0e10cSrcweir 
54cdf0e10cSrcweir     /**
55cdf0e10cSrcweir      * Analyze a text and return the most probable languages of the text
56cdf0e10cSrcweir      * @param char* text is the text to analyze
57cdf0e10cSrcweir      * @return the list of guess
58cdf0e10cSrcweir      */
59cdf0e10cSrcweir     vector<Guess> GuessLanguage(char* text);
60cdf0e10cSrcweir 
61cdf0e10cSrcweir     /**
62cdf0e10cSrcweir      * Analyze a text and return the most probable language of the text
63cdf0e10cSrcweir      * @param char* text is the text to analyze
64cdf0e10cSrcweir      * @return the guess (containing language)
65cdf0e10cSrcweir      */
66cdf0e10cSrcweir     Guess GuessPrimaryLanguage(char* text);
67cdf0e10cSrcweir 
68cdf0e10cSrcweir     /**
69cdf0e10cSrcweir      * List all available languages (possibly to be in guesses)
70cdf0e10cSrcweir      * @return the list of languages
71cdf0e10cSrcweir      */
72cdf0e10cSrcweir     vector<Guess> GetAvailableLanguages();
73cdf0e10cSrcweir 
74cdf0e10cSrcweir     /**
75cdf0e10cSrcweir      * List all languages (possibly in guesses or not)
76cdf0e10cSrcweir      * @return the list of languages
77cdf0e10cSrcweir      */
78cdf0e10cSrcweir     vector<Guess> GetAllManagedLanguages();
79cdf0e10cSrcweir 
80cdf0e10cSrcweir     /**
81cdf0e10cSrcweir      * List all Unavailable languages (disable for any reason)
82cdf0e10cSrcweir      * @return the list of languages
83cdf0e10cSrcweir      */
84cdf0e10cSrcweir     vector<Guess> GetUnavailableLanguages();
85cdf0e10cSrcweir 
86cdf0e10cSrcweir     /**
87cdf0e10cSrcweir      * Mark a language enabled
88cdf0e10cSrcweir      * @param string lang the language to enable (build like language-COUNTRY-encoding)
89cdf0e10cSrcweir      */
90cdf0e10cSrcweir     void EnableLanguage(string lang);
91cdf0e10cSrcweir 
92cdf0e10cSrcweir     /**
93cdf0e10cSrcweir      * Mark a language disabled
94cdf0e10cSrcweir      * @param string lang the language to disable (build like language-COUNTRY-encoding)
95cdf0e10cSrcweir      */
96cdf0e10cSrcweir     void DisableLanguage(string lang);
97cdf0e10cSrcweir 
98cdf0e10cSrcweir     /**
99cdf0e10cSrcweir      * Load a new DB of fingerprints
100cdf0e10cSrcweir      * @param const char* thePathOfConfFile self explaining
101cdf0e10cSrcweir      * @param const char* prefix is the path where the directory witch contains fingerprint files is stored
102cdf0e10cSrcweir     */
103cdf0e10cSrcweir     void SetDBPath(const char* thePathOfConfFile, const char* prefix);
104cdf0e10cSrcweir 
105cdf0e10cSrcweir protected:
106cdf0e10cSrcweir 
107cdf0e10cSrcweir     //Where typical fingerprints (n-gram tables) are stored
108cdf0e10cSrcweir     void* h;
109cdf0e10cSrcweir 
110cdf0e10cSrcweir     //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both
111cdf0e10cSrcweir     vector<Guess> GetManagedLanguages(const char mask);
112cdf0e10cSrcweir 
113cdf0e10cSrcweir     //Like getManagedLanguages, this function enable or disable a language and it depends of the mask
114cdf0e10cSrcweir     void XableLanguage(string lang, char mask);
115cdf0e10cSrcweir };
116cdf0e10cSrcweir 
117cdf0e10cSrcweir #endif
118