1# 2# A sample config file for the language models 3# provided with Gertjan van Noords language guesser 4# (http://odur.let.rug.nl/~vannoord/TextCat/) 5# 6# Notes: 7# - You may consider eliminating a couple of small languages from this 8# list because they cause false positives with big languages and are 9# bad for performance. (Do you really want to recognize Drents?) 10# - Putting the most probable languages at the top of the list 11# improves performance, because this will raise the threshold for 12# likely candidates more quickly. 13# 14 15# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding 16# guess strings are made as following : language-country-encoding 17 18afrikaans.lm af--utf8 19albanian.lm sq--utf8 20amharic_utf.lm am--utf8 21arabic.lm ar--utf8 22basque.lm eu--utf8 23belarus.lm be--utf8 24bosnian.lm bs--utf8 25breton.lm br--utf8 26catalan.lm ca--utf8 27chinese_simplified.lm zh-CN-utf8 28chinese_traditional.lm zh-TW-utf8 29croatian.lm hr--utf8 30czech.lm cs--utf8 31danish.lm da--utf8 32dutch.lm nl--utf8 33english.lm en--utf8 34esperanto.lm eo--utf8 35estonian.lm et--utf8 36finnish.lm fi--utf8 37french.lm fr--utf8 38frisian.lm fy--utf8 39georgian.lm ka--utf8 40german.lm de--utf8 41greek.lm el--utf8 42hebrew.lm he--utf8 43hindi.lm hi--utf8 44hungarian.lm hu--utf8 45icelandic.lm is--utf8 46indonesian.lm id--utf8 47irish_gaelic.lm ga--utf8 48italian.lm it--utf8 49japanese.lm ja--utf8 50korean.lm ko--utf8 51latin.lm la--utf8 52latvian.lm lv--utf8 53lithuanian.lm lt--utf8 54luxembourgish.lm lb--utf8 55malay.lm ms--utf8 56manx_gaelic.lm gv--utf8 57marathi.lm mr--utf8 58mongolian_cyrillic.lm mn--utf8 59nepali.lm ne--utf8 60norwegian.lm nb--utf8 # Norwegian (Bokmal) 61persian.lm fa--utf8 # Farsi 62polish.lm pl--utf8 63portuguese.lm pt-PT-utf8 64quechua.lm qu--utf8 65romanian.lm ro--utf8 66romansh.lm rm--utf8 67russian.lm ru--utf8 68sanskrit.lm sa--utf8 69scots.lm sco--utf8 70scots_gaelic.lm gd--utf8 71serbian.lm sr--utf-8 72serbian-latin.lm sh--utf-8 73slovak_ascii.lm sk-SK-utf8 74slovenian.lm sl--utf8 75spanish.lm es--utf8 76swahili.lm sw--utf8 77swedish.lm sv--utf8 78tagalog.lm tl--utf8 79tamil.lm ta--utf8 80thai.lm th--utf8 81turkish.lm tr--utf8 82ukrainian.lm uk--utf8 83vietnamese.lm vi--utf8 84welsh.lm cy--utf8 85yiddish_utf.lm yi--utf8 86zulu.lm zu--utf8 87