1*cdf0e10cSrcweir# 2*cdf0e10cSrcweir# A sample config file for the language models 3*cdf0e10cSrcweir# provided with Gertjan van Noords language guesser 4*cdf0e10cSrcweir# (http://odur.let.rug.nl/~vannoord/TextCat/) 5*cdf0e10cSrcweir# 6*cdf0e10cSrcweir# Notes: 7*cdf0e10cSrcweir# - You may consider eliminating a couple of small languages from this 8*cdf0e10cSrcweir# list because they cause false positives with big languages and are 9*cdf0e10cSrcweir# bad for performance. (Do you really want to recognize Drents?) 10*cdf0e10cSrcweir# - Putting the most probable languages at the top of the list 11*cdf0e10cSrcweir# improves performance, because this will raise the threshold for 12*cdf0e10cSrcweir# likely candidates more quickly. 13*cdf0e10cSrcweir# 14*cdf0e10cSrcweir 15*cdf0e10cSrcweir# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding 16*cdf0e10cSrcweir# guess strings are made as following : language-country-encoding 17*cdf0e10cSrcweir 18*cdf0e10cSrcweirafrikaans.lm af--utf8 19*cdf0e10cSrcweiralbanian.lm sq--utf8 20*cdf0e10cSrcweiramharic_utf.lm am--utf8 21*cdf0e10cSrcweirarabic.lm ar--utf8 22*cdf0e10cSrcweirbasque.lm eu--utf8 23*cdf0e10cSrcweirbelarus.lm be--utf8 24*cdf0e10cSrcweirbosnian.lm bs--utf8 25*cdf0e10cSrcweirbreton.lm br--utf8 26*cdf0e10cSrcweircatalan.lm ca--utf8 27*cdf0e10cSrcweirchinese_simplified.lm zh-CN-utf8 28*cdf0e10cSrcweirchinese_traditional.lm zh-TW-utf8 29*cdf0e10cSrcweircroatian.lm hr--utf8 30*cdf0e10cSrcweirczech.lm cs--utf8 31*cdf0e10cSrcweirdanish.lm da--utf8 32*cdf0e10cSrcweirdutch.lm nl--utf8 33*cdf0e10cSrcweirenglish.lm en--utf8 34*cdf0e10cSrcweiresperanto.lm eo--utf8 35*cdf0e10cSrcweirestonian.lm et--utf8 36*cdf0e10cSrcweirfinnish.lm fi--utf8 37*cdf0e10cSrcweirfrench.lm fr--utf8 38*cdf0e10cSrcweirfrisian.lm fy--utf8 39*cdf0e10cSrcweirgeorgian.lm ka--utf8 40*cdf0e10cSrcweirgerman.lm de--utf8 41*cdf0e10cSrcweirgreek.lm el--utf8 42*cdf0e10cSrcweirhebrew.lm he--utf8 43*cdf0e10cSrcweirhindi.lm hi--utf8 44*cdf0e10cSrcweirhungarian.lm hu--utf8 45*cdf0e10cSrcweiricelandic.lm is--utf8 46*cdf0e10cSrcweirindonesian.lm id--utf8 47*cdf0e10cSrcweirirish_gaelic.lm ga--utf8 48*cdf0e10cSrcweiritalian.lm it--utf8 49*cdf0e10cSrcweirjapanese.lm ja--utf8 50*cdf0e10cSrcweirkorean.lm ko--utf8 51*cdf0e10cSrcweirlatin.lm la--utf8 52*cdf0e10cSrcweirlatvian.lm lv--utf8 53*cdf0e10cSrcweirlithuanian.lm lt--utf8 54*cdf0e10cSrcweirluxembourgish.lm lb--utf8 55*cdf0e10cSrcweirmalay.lm ms--utf8 56*cdf0e10cSrcweirmanx_gaelic.lm gv--utf8 57*cdf0e10cSrcweirmarathi.lm mr--utf8 58*cdf0e10cSrcweirmongolian_cyrillic.lm mn--utf8 59*cdf0e10cSrcweirnepali.lm ne--utf8 60*cdf0e10cSrcweirnorwegian.lm nb--utf8 # Norwegian (Bokmal) 61*cdf0e10cSrcweirpersian.lm fa--utf8 # Farsi 62*cdf0e10cSrcweirpolish.lm pl--utf8 63*cdf0e10cSrcweirportuguese.lm pt-PT-utf8 64*cdf0e10cSrcweirquechua.lm qu--utf8 65*cdf0e10cSrcweirromanian.lm ro--utf8 66*cdf0e10cSrcweirromansh.lm rm--utf8 67*cdf0e10cSrcweirrussian.lm ru--utf8 68*cdf0e10cSrcweirsanskrit.lm sa--utf8 69*cdf0e10cSrcweirscots.lm sco--utf8 70*cdf0e10cSrcweirscots_gaelic.lm gd--utf8 71*cdf0e10cSrcweirserbian.lm sr--utf-8 72*cdf0e10cSrcweirserbian-latin.lm sh--utf-8 73*cdf0e10cSrcweirslovak_ascii.lm sk-SK-utf8 74*cdf0e10cSrcweirslovenian.lm sl--utf8 75*cdf0e10cSrcweirspanish.lm es--utf8 76*cdf0e10cSrcweirswahili.lm sw--utf8 77*cdf0e10cSrcweirswedish.lm sv--utf8 78*cdf0e10cSrcweirtagalog.lm tl--utf8 79*cdf0e10cSrcweirtamil.lm ta--utf8 80*cdf0e10cSrcweirthai.lm th--utf8 81*cdf0e10cSrcweirturkish.lm tr--utf8 82*cdf0e10cSrcweirukrainian.lm uk--utf8 83*cdf0e10cSrcweirvietnamese.lm vi--utf8 84*cdf0e10cSrcweirwelsh.lm cy--utf8 85*cdf0e10cSrcweiryiddish_utf.lm yi--utf8 86*cdf0e10cSrcweirzulu.lm zu--utf8 87