1*cdf0e10cSrcweir /***************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir #ifndef SIMPLEGUESSER_H
28*cdf0e10cSrcweir #define SIMPLEGUESSER_H
29*cdf0e10cSrcweir 
30*cdf0e10cSrcweir #include <string.h>
31*cdf0e10cSrcweir #include <string>
32*cdf0e10cSrcweir #include <cstdlib>
33*cdf0e10cSrcweir #include <vector>
34*cdf0e10cSrcweir #include <guess.hxx>
35*cdf0e10cSrcweir 
36*cdf0e10cSrcweir #define MAX_STRING_LENGTH_TO_ANALYSE 200
37*cdf0e10cSrcweir 
38*cdf0e10cSrcweir using namespace std;
39*cdf0e10cSrcweir 
40*cdf0e10cSrcweir /**
41*cdf0e10cSrcweir @author Jocelyn Merand
42*cdf0e10cSrcweir */
43*cdf0e10cSrcweir class SimpleGuesser{
44*cdf0e10cSrcweir public:
45*cdf0e10cSrcweir     /**inits the object with conf file "./conf.txt"*/
46*cdf0e10cSrcweir     SimpleGuesser();
47*cdf0e10cSrcweir 
48*cdf0e10cSrcweir     /** Compares the current Simpleguesser with an other
49*cdf0e10cSrcweir      * @param SimpleGuesser& sg the other guesser to compare
50*cdf0e10cSrcweir      */
51*cdf0e10cSrcweir     void operator=(SimpleGuesser& sg);
52*cdf0e10cSrcweir 
53*cdf0e10cSrcweir     /**
54*cdf0e10cSrcweir      * destroy the object
55*cdf0e10cSrcweir      */
56*cdf0e10cSrcweir     ~SimpleGuesser();
57*cdf0e10cSrcweir 
58*cdf0e10cSrcweir     /**
59*cdf0e10cSrcweir      * Analyze a text and return the most probable languages of the text
60*cdf0e10cSrcweir      * @param char* text is the text to analyze
61*cdf0e10cSrcweir      * @return the list of guess
62*cdf0e10cSrcweir      */
63*cdf0e10cSrcweir     vector<Guess> GuessLanguage(char* text);
64*cdf0e10cSrcweir 
65*cdf0e10cSrcweir     /**
66*cdf0e10cSrcweir      * Analyze a text and return the most probable language of the text
67*cdf0e10cSrcweir      * @param char* text is the text to analyze
68*cdf0e10cSrcweir      * @return the guess (containing language)
69*cdf0e10cSrcweir      */
70*cdf0e10cSrcweir     Guess GuessPrimaryLanguage(char* text);
71*cdf0e10cSrcweir 
72*cdf0e10cSrcweir     /**
73*cdf0e10cSrcweir      * List all available languages (possibly to be in guesses)
74*cdf0e10cSrcweir      * @return the list of languages
75*cdf0e10cSrcweir      */
76*cdf0e10cSrcweir     vector<Guess> GetAvailableLanguages();
77*cdf0e10cSrcweir 
78*cdf0e10cSrcweir     /**
79*cdf0e10cSrcweir      * List all languages (possibly in guesses or not)
80*cdf0e10cSrcweir      * @return the list of languages
81*cdf0e10cSrcweir      */
82*cdf0e10cSrcweir     vector<Guess> GetAllManagedLanguages();
83*cdf0e10cSrcweir 
84*cdf0e10cSrcweir     /**
85*cdf0e10cSrcweir      * List all Unavailable languages (disable for any reason)
86*cdf0e10cSrcweir      * @return the list of languages
87*cdf0e10cSrcweir      */
88*cdf0e10cSrcweir     vector<Guess> GetUnavailableLanguages();
89*cdf0e10cSrcweir 
90*cdf0e10cSrcweir     /**
91*cdf0e10cSrcweir      * Mark a language enabled
92*cdf0e10cSrcweir      * @param string lang the language to enable (build like language-COUNTRY-encoding)
93*cdf0e10cSrcweir      */
94*cdf0e10cSrcweir     void EnableLanguage(string lang);
95*cdf0e10cSrcweir 
96*cdf0e10cSrcweir     /**
97*cdf0e10cSrcweir      * Mark a language disabled
98*cdf0e10cSrcweir      * @param string lang the language to disable (build like language-COUNTRY-encoding)
99*cdf0e10cSrcweir      */
100*cdf0e10cSrcweir     void DisableLanguage(string lang);
101*cdf0e10cSrcweir 
102*cdf0e10cSrcweir     /**
103*cdf0e10cSrcweir      * Load a new DB of fingerprints
104*cdf0e10cSrcweir      * @param const char* thePathOfConfFile self explaining
105*cdf0e10cSrcweir      * @param const char* prefix is the path where the directory witch contains fingerprint files is stored
106*cdf0e10cSrcweir     */
107*cdf0e10cSrcweir     void SetDBPath(const char* thePathOfConfFile, const char* prefix);
108*cdf0e10cSrcweir 
109*cdf0e10cSrcweir protected:
110*cdf0e10cSrcweir 
111*cdf0e10cSrcweir     //Where typical fingerprints (n-gram tables) are stored
112*cdf0e10cSrcweir     void* h;
113*cdf0e10cSrcweir 
114*cdf0e10cSrcweir     //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both
115*cdf0e10cSrcweir     vector<Guess> GetManagedLanguages(const char mask);
116*cdf0e10cSrcweir 
117*cdf0e10cSrcweir     //Like getManagedLanguages, this function enable or disable a language and it depends of the mask
118*cdf0e10cSrcweir     void XableLanguage(string lang, char mask);
119*cdf0e10cSrcweir };
120*cdf0e10cSrcweir 
121*cdf0e10cSrcweir #endif
122