1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 #ifndef SIMPLEGUESSER_H
24 #define SIMPLEGUESSER_H
25 
26 #include <string.h>
27 #include <string>
28 #include <cstdlib>
29 #include <vector>
30 #include <guess.hxx>
31 
32 #define MAX_STRING_LENGTH_TO_ANALYSE 200
33 
34 using namespace std;
35 
36 /**
37 @author Jocelyn Merand
38 */
39 class SimpleGuesser{
40 public:
41     /**inits the object with conf file "./conf.txt"*/
42     SimpleGuesser();
43 
44     /** Compares the current Simpleguesser with an other
45      * @param SimpleGuesser& sg the other guesser to compare
46      */
47     void operator=(SimpleGuesser& sg);
48 
49     /**
50      * destroy the object
51      */
52     ~SimpleGuesser();
53 
54     /**
55      * Analyze a text and return the most probable languages of the text
56      * @param char* text is the text to analyze
57      * @return the list of guess
58      */
59     vector<Guess> GuessLanguage(char* text);
60 
61     /**
62      * Analyze a text and return the most probable language of the text
63      * @param char* text is the text to analyze
64      * @return the guess (containing language)
65      */
66     Guess GuessPrimaryLanguage(char* text);
67 
68     /**
69      * List all available languages (possibly to be in guesses)
70      * @return the list of languages
71      */
72     vector<Guess> GetAvailableLanguages();
73 
74     /**
75      * List all languages (possibly in guesses or not)
76      * @return the list of languages
77      */
78     vector<Guess> GetAllManagedLanguages();
79 
80     /**
81      * List all Unavailable languages (disable for any reason)
82      * @return the list of languages
83      */
84     vector<Guess> GetUnavailableLanguages();
85 
86     /**
87      * Mark a language enabled
88      * @param string lang the language to enable (build like language-COUNTRY-encoding)
89      */
90     void EnableLanguage(string lang);
91 
92     /**
93      * Mark a language disabled
94      * @param string lang the language to disable (build like language-COUNTRY-encoding)
95      */
96     void DisableLanguage(string lang);
97 
98     /**
99      * Load a new DB of fingerprints
100      * @param const char* thePathOfConfFile self explaining
101      * @param const char* prefix is the path where the directory witch contains fingerprint files is stored
102     */
103     void SetDBPath(const char* thePathOfConfFile, const char* prefix);
104 
105 protected:
106 
107     //Where typical fingerprints (n-gram tables) are stored
108     void* h;
109 
110     //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both
111     vector<Guess> GetManagedLanguages(const char mask);
112 
113     //Like getManagedLanguages, this function enable or disable a language and it depends of the mask
114     void XableLanguage(string lang, char mask);
115 };
116 
117 #endif
118