1*e7675e54SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*e7675e54SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*e7675e54SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*e7675e54SAndrew Rist * distributed with this work for additional information 6*e7675e54SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*e7675e54SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*e7675e54SAndrew Rist * "License"); you may not use this file except in compliance 9*e7675e54SAndrew Rist * with the License. You may obtain a copy of the License at 10*e7675e54SAndrew Rist * 11*e7675e54SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*e7675e54SAndrew Rist * 13*e7675e54SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*e7675e54SAndrew Rist * software distributed under the License is distributed on an 15*e7675e54SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*e7675e54SAndrew Rist * KIND, either express or implied. See the License for the 17*e7675e54SAndrew Rist * specific language governing permissions and limitations 18*e7675e54SAndrew Rist * under the License. 19*e7675e54SAndrew Rist * 20*e7675e54SAndrew Rist *************************************************************/ 21*e7675e54SAndrew Rist 22*e7675e54SAndrew Rist 23cdf0e10cSrcweir #ifndef SIMPLEGUESSER_H 24cdf0e10cSrcweir #define SIMPLEGUESSER_H 25cdf0e10cSrcweir 26cdf0e10cSrcweir #include <string.h> 27cdf0e10cSrcweir #include <string> 28cdf0e10cSrcweir #include <cstdlib> 29cdf0e10cSrcweir #include <vector> 30cdf0e10cSrcweir #include <guess.hxx> 31cdf0e10cSrcweir 32cdf0e10cSrcweir #define MAX_STRING_LENGTH_TO_ANALYSE 200 33cdf0e10cSrcweir 34cdf0e10cSrcweir using namespace std; 35cdf0e10cSrcweir 36cdf0e10cSrcweir /** 37cdf0e10cSrcweir @author Jocelyn Merand 38cdf0e10cSrcweir */ 39cdf0e10cSrcweir class SimpleGuesser{ 40cdf0e10cSrcweir public: 41cdf0e10cSrcweir /**inits the object with conf file "./conf.txt"*/ 42cdf0e10cSrcweir SimpleGuesser(); 43cdf0e10cSrcweir 44cdf0e10cSrcweir /** Compares the current Simpleguesser with an other 45cdf0e10cSrcweir * @param SimpleGuesser& sg the other guesser to compare 46cdf0e10cSrcweir */ 47cdf0e10cSrcweir void operator=(SimpleGuesser& sg); 48cdf0e10cSrcweir 49cdf0e10cSrcweir /** 50cdf0e10cSrcweir * destroy the object 51cdf0e10cSrcweir */ 52cdf0e10cSrcweir ~SimpleGuesser(); 53cdf0e10cSrcweir 54cdf0e10cSrcweir /** 55cdf0e10cSrcweir * Analyze a text and return the most probable languages of the text 56cdf0e10cSrcweir * @param char* text is the text to analyze 57cdf0e10cSrcweir * @return the list of guess 58cdf0e10cSrcweir */ 59cdf0e10cSrcweir vector<Guess> GuessLanguage(char* text); 60cdf0e10cSrcweir 61cdf0e10cSrcweir /** 62cdf0e10cSrcweir * Analyze a text and return the most probable language of the text 63cdf0e10cSrcweir * @param char* text is the text to analyze 64cdf0e10cSrcweir * @return the guess (containing language) 65cdf0e10cSrcweir */ 66cdf0e10cSrcweir Guess GuessPrimaryLanguage(char* text); 67cdf0e10cSrcweir 68cdf0e10cSrcweir /** 69cdf0e10cSrcweir * List all available languages (possibly to be in guesses) 70cdf0e10cSrcweir * @return the list of languages 71cdf0e10cSrcweir */ 72cdf0e10cSrcweir vector<Guess> GetAvailableLanguages(); 73cdf0e10cSrcweir 74cdf0e10cSrcweir /** 75cdf0e10cSrcweir * List all languages (possibly in guesses or not) 76cdf0e10cSrcweir * @return the list of languages 77cdf0e10cSrcweir */ 78cdf0e10cSrcweir vector<Guess> GetAllManagedLanguages(); 79cdf0e10cSrcweir 80cdf0e10cSrcweir /** 81cdf0e10cSrcweir * List all Unavailable languages (disable for any reason) 82cdf0e10cSrcweir * @return the list of languages 83cdf0e10cSrcweir */ 84cdf0e10cSrcweir vector<Guess> GetUnavailableLanguages(); 85cdf0e10cSrcweir 86cdf0e10cSrcweir /** 87cdf0e10cSrcweir * Mark a language enabled 88cdf0e10cSrcweir * @param string lang the language to enable (build like language-COUNTRY-encoding) 89cdf0e10cSrcweir */ 90cdf0e10cSrcweir void EnableLanguage(string lang); 91cdf0e10cSrcweir 92cdf0e10cSrcweir /** 93cdf0e10cSrcweir * Mark a language disabled 94cdf0e10cSrcweir * @param string lang the language to disable (build like language-COUNTRY-encoding) 95cdf0e10cSrcweir */ 96cdf0e10cSrcweir void DisableLanguage(string lang); 97cdf0e10cSrcweir 98cdf0e10cSrcweir /** 99cdf0e10cSrcweir * Load a new DB of fingerprints 100cdf0e10cSrcweir * @param const char* thePathOfConfFile self explaining 101cdf0e10cSrcweir * @param const char* prefix is the path where the directory witch contains fingerprint files is stored 102cdf0e10cSrcweir */ 103cdf0e10cSrcweir void SetDBPath(const char* thePathOfConfFile, const char* prefix); 104cdf0e10cSrcweir 105cdf0e10cSrcweir protected: 106cdf0e10cSrcweir 107cdf0e10cSrcweir //Where typical fingerprints (n-gram tables) are stored 108cdf0e10cSrcweir void* h; 109cdf0e10cSrcweir 110cdf0e10cSrcweir //Is used to select languages into the fingerprints DB, the mask is used to indicate if we want enabled disabled or both 111cdf0e10cSrcweir vector<Guess> GetManagedLanguages(const char mask); 112cdf0e10cSrcweir 113cdf0e10cSrcweir //Like getManagedLanguages, this function enable or disable a language and it depends of the mask 114cdf0e10cSrcweir void XableLanguage(string lang, char mask); 115cdf0e10cSrcweir }; 116cdf0e10cSrcweir 117cdf0e10cSrcweir #endif 118