1*cdf0e10cSrcweir /*************************************************************************** 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir #ifndef SIMPLEGUESSER_H 28*cdf0e10cSrcweir #define SIMPLEGUESSER_H 29*cdf0e10cSrcweir 30*cdf0e10cSrcweir #include <string.h> 31*cdf0e10cSrcweir #include <string> 32*cdf0e10cSrcweir #include <cstdlib> 33*cdf0e10cSrcweir #include <vector> 34*cdf0e10cSrcweir #include <guess.hxx> 35*cdf0e10cSrcweir 36*cdf0e10cSrcweir #define MAX_STRING_LENGTH_TO_ANALYSE 200 37*cdf0e10cSrcweir 38*cdf0e10cSrcweir using namespace std; 39*cdf0e10cSrcweir 40*cdf0e10cSrcweir /** 41*cdf0e10cSrcweir @author Jocelyn Merand 42*cdf0e10cSrcweir */ 43*cdf0e10cSrcweir class SimpleGuesser{ 44*cdf0e10cSrcweir public: 45*cdf0e10cSrcweir /**inits the object with conf file "./conf.txt"*/ 46*cdf0e10cSrcweir SimpleGuesser(); 47*cdf0e10cSrcweir 48*cdf0e10cSrcweir /** Compares the current Simpleguesser with an other 49*cdf0e10cSrcweir * @param SimpleGuesser& sg the other guesser to compare 50*cdf0e10cSrcweir */ 51*cdf0e10cSrcweir void operator=(SimpleGuesser& sg); 52*cdf0e10cSrcweir 53*cdf0e10cSrcweir /** 54*cdf0e10cSrcweir * destroy the object 55*cdf0e10cSrcweir */ 56*cdf0e10cSrcweir ~SimpleGuesser(); 57*cdf0e10cSrcweir 58*cdf0e10cSrcweir /** 59*cdf0e10cSrcweir * Analyze a text and return the most probable languages of the text 60*cdf0e10cSrcweir * @param char* text is the text to analyze 61*cdf0e10cSrcweir * @return the list of guess 62*cdf0e10cSrcweir */ 63*cdf0e10cSrcweir vector<Guess> GuessLanguage(char* text); 64*cdf0e10cSrcweir 65*cdf0e10cSrcweir /** 66*cdf0e10cSrcweir * Analyze a text and return the most probable language of the text 67*cdf0e10cSrcweir * @param char* text is the text to analyze 68*cdf0e10cSrcweir * @return the guess (containing language) 69*cdf0e10cSrcweir */ 70*cdf0e10cSrcweir Guess GuessPrimaryLanguage(char* text); 71*cdf0e10cSrcweir 72*cdf0e10cSrcweir /** 73*cdf0e10cSrcweir * List all available languages (possibly to be in guesses) 74*cdf0e10cSrcweir * @return the list of languages 75*cdf0e10cSrcweir */ 76*cdf0e10cSrcweir vector<Guess> GetAvailableLanguages(); 77*cdf0e10cSrcweir 78*cdf0e10cSrcweir /** 79*cdf0e10cSrcweir * List all languages (possibly in guesses or not) 80*cdf0e10cSrcweir * @return the list of languages 81*cdf0e10cSrcweir */ 82*cdf0e10cSrcweir vector<Guess> GetAllManagedLanguages(); 83*cdf0e10cSrcweir 84*cdf0e10cSrcweir /** 85*cdf0e10cSrcweir * List all Unavailable languages (disable for any reason) 86*cdf0e10cSrcweir * @return the list of languages 87*cdf0e10cSrcweir */ 88*cdf0e10cSrcweir vector<Guess> GetUnavailableLanguages(); 89*cdf0e10cSrcweir 90*cdf0e10cSrcweir /** 91*cdf0e10cSrcweir * Mark a language enabled 92*cdf0e10cSrcweir * @param string lang the language to enable (build like language-COUNTRY-encoding) 93*cdf0e10cSrcweir */ 94*cdf0e10cSrcweir void EnableLanguage(string lang); 95*cdf0e10cSrcweir 96*cdf0e10cSrcweir /** 97*cdf0e10cSrcweir * Mark a language disabled 98*cdf0e10cSrcweir * @param string lang the language to disable (build like language-COUNTRY-encoding) 99*cdf0e10cSrcweir */ 100*cdf0e10cSrcweir void DisableLanguage(string lang); 101*cdf0e10cSrcweir 102*cdf0e10cSrcweir /** 103*cdf0e10cSrcweir * Load a new DB of fingerprints 104*cdf0e10cSrcweir * @param const char* thePathOfConfFile self explaining 105*cdf0e10cSrcweir * @param const char* prefix is the path where the directory witch contains fingerprint files is stored 106*cdf0e10cSrcweir */ 107*cdf0e10cSrcweir void SetDBPath(const char* thePathOfConfFile, const char* prefix); 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir protected: 110*cdf0e10cSrcweir 111*cdf0e10cSrcweir //Where typical fingerprints (n-gram tables) are stored 112*cdf0e10cSrcweir void* h; 113*cdf0e10cSrcweir 114*cdf0e10cSrcweir //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both 115*cdf0e10cSrcweir vector<Guess> GetManagedLanguages(const char mask); 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir //Like getManagedLanguages, this function enable or disable a language and it depends of the mask 118*cdf0e10cSrcweir void XableLanguage(string lang, char mask); 119*cdf0e10cSrcweir }; 120*cdf0e10cSrcweir 121*cdf0e10cSrcweir #endif 122