1b3f7bbdeSPedro Giffuni# Copyright (c) 2003, WiseGuys Internet B.V. 2b3f7bbdeSPedro Giffuni# 3b3f7bbdeSPedro Giffuni# All rights reserved. 4b3f7bbdeSPedro Giffuni# 5b3f7bbdeSPedro Giffuni# Redistribution and use in source and binary forms, with or without 6b3f7bbdeSPedro Giffuni# modification, are permitted provided that the following conditions are 7b3f7bbdeSPedro Giffuni# met: 8b3f7bbdeSPedro Giffuni# 9b3f7bbdeSPedro Giffuni# - Redistributions of source code must retain the above copyright 10b3f7bbdeSPedro Giffuni# notice, this list of conditions and the following disclaimer. 11b3f7bbdeSPedro Giffuni# 12b3f7bbdeSPedro Giffuni# - Redistributions in binary form must reproduce the above copyright 13b3f7bbdeSPedro Giffuni# notice, this list of conditions and the following disclaimer in the 14b3f7bbdeSPedro Giffuni# documentation and/or other materials provided with the distribution. 15b3f7bbdeSPedro Giffuni# 16b3f7bbdeSPedro Giffuni# - Neither the name of the WiseGuys Internet B.V. nor the names of its 17b3f7bbdeSPedro Giffuni# contributors may be used to endorse or promote products derived from 18b3f7bbdeSPedro Giffuni# this software without specific prior written permission. 19b3f7bbdeSPedro Giffuni# 20b3f7bbdeSPedro Giffuni# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21b3f7bbdeSPedro Giffuni# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22b3f7bbdeSPedro Giffuni# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23b3f7bbdeSPedro Giffuni# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24b3f7bbdeSPedro Giffuni# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25b3f7bbdeSPedro Giffuni# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26b3f7bbdeSPedro Giffuni# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27b3f7bbdeSPedro Giffuni# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28b3f7bbdeSPedro Giffuni# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29b3f7bbdeSPedro Giffuni# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30b3f7bbdeSPedro Giffuni# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31cdf0e10cSrcweir# 32*100a770dSPedro Giffuni 33cdf0e10cSrcweir# A sample config file for the language models 34cdf0e10cSrcweir# provided with Gertjan van Noords language guesser 35cdf0e10cSrcweir# (http://odur.let.rug.nl/~vannoord/TextCat/) 36cdf0e10cSrcweir# 37cdf0e10cSrcweir# Notes: 38cdf0e10cSrcweir# - You may consider eliminating a couple of small languages from this 39cdf0e10cSrcweir# list because they cause false positives with big languages and are 40cdf0e10cSrcweir# bad for performance. (Do you really want to recognize Drents?) 41cdf0e10cSrcweir# - Putting the most probable languages at the top of the list 42cdf0e10cSrcweir# improves performance, because this will raise the threshold for 43cdf0e10cSrcweir# likely candidates more quickly. 44cdf0e10cSrcweir# 45cdf0e10cSrcweir 46*100a770dSPedro Giffuni# this file have been modified (to OOo by Jocelyn MERAND 47*100a770dSPedro Giffuni# joc.merATgmail.com) to include country and encoding 48cdf0e10cSrcweir# guess strings are made as following : language-country-encoding 49cdf0e10cSrcweir 50cdf0e10cSrcweirafrikaans.lm af--utf8 51cdf0e10cSrcweiralbanian.lm sq--utf8 52cdf0e10cSrcweiramharic_utf.lm am--utf8 53cdf0e10cSrcweirarabic.lm ar--utf8 54cdf0e10cSrcweirbasque.lm eu--utf8 55cdf0e10cSrcweirbelarus.lm be--utf8 56cdf0e10cSrcweirbosnian.lm bs--utf8 57cdf0e10cSrcweirbreton.lm br--utf8 58cdf0e10cSrcweircatalan.lm ca--utf8 59cdf0e10cSrcweirchinese_simplified.lm zh-CN-utf8 60cdf0e10cSrcweirchinese_traditional.lm zh-TW-utf8 61cdf0e10cSrcweircroatian.lm hr--utf8 62cdf0e10cSrcweirczech.lm cs--utf8 63cdf0e10cSrcweirdanish.lm da--utf8 64cdf0e10cSrcweirdutch.lm nl--utf8 65cdf0e10cSrcweirenglish.lm en--utf8 66cdf0e10cSrcweiresperanto.lm eo--utf8 67cdf0e10cSrcweirestonian.lm et--utf8 68cdf0e10cSrcweirfinnish.lm fi--utf8 69cdf0e10cSrcweirfrench.lm fr--utf8 70cdf0e10cSrcweirfrisian.lm fy--utf8 71cdf0e10cSrcweirgeorgian.lm ka--utf8 72cdf0e10cSrcweirgerman.lm de--utf8 73cdf0e10cSrcweirgreek.lm el--utf8 74cdf0e10cSrcweirhebrew.lm he--utf8 75cdf0e10cSrcweirhindi.lm hi--utf8 76cdf0e10cSrcweirhungarian.lm hu--utf8 77cdf0e10cSrcweiricelandic.lm is--utf8 78cdf0e10cSrcweirindonesian.lm id--utf8 79cdf0e10cSrcweirirish_gaelic.lm ga--utf8 80cdf0e10cSrcweiritalian.lm it--utf8 81cdf0e10cSrcweirjapanese.lm ja--utf8 82cdf0e10cSrcweirkorean.lm ko--utf8 83cdf0e10cSrcweirlatin.lm la--utf8 84cdf0e10cSrcweirlatvian.lm lv--utf8 85cdf0e10cSrcweirlithuanian.lm lt--utf8 86cdf0e10cSrcweirluxembourgish.lm lb--utf8 87cdf0e10cSrcweirmalay.lm ms--utf8 88cdf0e10cSrcweirmanx_gaelic.lm gv--utf8 89cdf0e10cSrcweirmarathi.lm mr--utf8 90cdf0e10cSrcweirmongolian_cyrillic.lm mn--utf8 91cdf0e10cSrcweirnepali.lm ne--utf8 92cdf0e10cSrcweirnorwegian.lm nb--utf8 # Norwegian (Bokmal) 93cdf0e10cSrcweirpersian.lm fa--utf8 # Farsi 94cdf0e10cSrcweirpolish.lm pl--utf8 95cdf0e10cSrcweirportuguese.lm pt-PT-utf8 96cdf0e10cSrcweirquechua.lm qu--utf8 97cdf0e10cSrcweirromanian.lm ro--utf8 98cdf0e10cSrcweirromansh.lm rm--utf8 99cdf0e10cSrcweirrussian.lm ru--utf8 100cdf0e10cSrcweirsanskrit.lm sa--utf8 101cdf0e10cSrcweirscots.lm sco--utf8 102cdf0e10cSrcweirscots_gaelic.lm gd--utf8 103cdf0e10cSrcweirserbian.lm sr--utf-8 104cdf0e10cSrcweirserbian-latin.lm sh--utf-8 105cdf0e10cSrcweirslovak_ascii.lm sk-SK-utf8 106cdf0e10cSrcweirslovenian.lm sl--utf8 107cdf0e10cSrcweirspanish.lm es--utf8 108cdf0e10cSrcweirswahili.lm sw--utf8 109cdf0e10cSrcweirswedish.lm sv--utf8 110cdf0e10cSrcweirtagalog.lm tl--utf8 111cdf0e10cSrcweirtamil.lm ta--utf8 112cdf0e10cSrcweirthai.lm th--utf8 113cdf0e10cSrcweirturkish.lm tr--utf8 114cdf0e10cSrcweirukrainian.lm uk--utf8 115cdf0e10cSrcweirvietnamese.lm vi--utf8 116cdf0e10cSrcweirwelsh.lm cy--utf8 117cdf0e10cSrcweiryiddish_utf.lm yi--utf8 118cdf0e10cSrcweirzulu.lm zu--utf8 119