1b3f7bbdeSPedro Giffuni# Copyright (c) 2003, WiseGuys Internet B.V.
2b3f7bbdeSPedro Giffuni#
3b3f7bbdeSPedro Giffuni# All rights reserved.
4b3f7bbdeSPedro Giffuni#
5b3f7bbdeSPedro Giffuni# Redistribution and use in source and binary forms, with or without
6b3f7bbdeSPedro Giffuni# modification, are permitted provided that the following conditions are
7b3f7bbdeSPedro Giffuni# met:
8b3f7bbdeSPedro Giffuni#
9b3f7bbdeSPedro Giffuni# - Redistributions of source code must retain the above copyright
10b3f7bbdeSPedro Giffuni# notice, this list of conditions and the following disclaimer.
11b3f7bbdeSPedro Giffuni#
12b3f7bbdeSPedro Giffuni# - Redistributions in binary form must reproduce the above copyright
13b3f7bbdeSPedro Giffuni# notice, this list of conditions and the following disclaimer in the
14b3f7bbdeSPedro Giffuni# documentation and/or other materials provided with the distribution.
15b3f7bbdeSPedro Giffuni#
16b3f7bbdeSPedro Giffuni# - Neither the name of the WiseGuys Internet B.V. nor the names of its
17b3f7bbdeSPedro Giffuni# contributors may be used to endorse or promote products derived from
18b3f7bbdeSPedro Giffuni# this software without specific prior written permission.
19b3f7bbdeSPedro Giffuni#
20b3f7bbdeSPedro Giffuni# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21b3f7bbdeSPedro Giffuni# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22b3f7bbdeSPedro Giffuni# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23b3f7bbdeSPedro Giffuni# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24b3f7bbdeSPedro Giffuni# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25b3f7bbdeSPedro Giffuni# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26b3f7bbdeSPedro Giffuni# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27b3f7bbdeSPedro Giffuni# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28b3f7bbdeSPedro Giffuni# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29b3f7bbdeSPedro Giffuni# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30b3f7bbdeSPedro Giffuni# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31cdf0e10cSrcweir#
32*100a770dSPedro Giffuni
33cdf0e10cSrcweir# A sample config file for the language models
34cdf0e10cSrcweir# provided with Gertjan van Noords language guesser
35cdf0e10cSrcweir# (http://odur.let.rug.nl/~vannoord/TextCat/)
36cdf0e10cSrcweir#
37cdf0e10cSrcweir# Notes:
38cdf0e10cSrcweir# - You may consider eliminating a couple of small languages from this
39cdf0e10cSrcweir# list because they cause false positives with big languages and are
40cdf0e10cSrcweir# bad for performance. (Do you really want to recognize Drents?)
41cdf0e10cSrcweir# - Putting the most probable languages at the top of the list
42cdf0e10cSrcweir# improves performance, because this will raise the threshold for
43cdf0e10cSrcweir# likely candidates more quickly.
44cdf0e10cSrcweir#
45cdf0e10cSrcweir
46*100a770dSPedro Giffuni# this file have been modified (to OOo by Jocelyn MERAND
47*100a770dSPedro Giffuni# joc.merATgmail.com) to include country and encoding
48cdf0e10cSrcweir# guess strings are made as following : language-country-encoding
49cdf0e10cSrcweir
50cdf0e10cSrcweirafrikaans.lm                         af--utf8
51cdf0e10cSrcweiralbanian.lm                          sq--utf8
52cdf0e10cSrcweiramharic_utf.lm                       am--utf8
53cdf0e10cSrcweirarabic.lm                            ar--utf8
54cdf0e10cSrcweirbasque.lm                            eu--utf8
55cdf0e10cSrcweirbelarus.lm                           be--utf8
56cdf0e10cSrcweirbosnian.lm                           bs--utf8
57cdf0e10cSrcweirbreton.lm                            br--utf8
58cdf0e10cSrcweircatalan.lm                           ca--utf8
59cdf0e10cSrcweirchinese_simplified.lm                zh-CN-utf8
60cdf0e10cSrcweirchinese_traditional.lm               zh-TW-utf8
61cdf0e10cSrcweircroatian.lm                          hr--utf8
62cdf0e10cSrcweirczech.lm                             cs--utf8
63cdf0e10cSrcweirdanish.lm                            da--utf8
64cdf0e10cSrcweirdutch.lm                             nl--utf8
65cdf0e10cSrcweirenglish.lm                           en--utf8
66cdf0e10cSrcweiresperanto.lm                         eo--utf8
67cdf0e10cSrcweirestonian.lm                          et--utf8
68cdf0e10cSrcweirfinnish.lm                           fi--utf8
69cdf0e10cSrcweirfrench.lm                            fr--utf8
70cdf0e10cSrcweirfrisian.lm                           fy--utf8
71cdf0e10cSrcweirgeorgian.lm                          ka--utf8
72cdf0e10cSrcweirgerman.lm                            de--utf8
73cdf0e10cSrcweirgreek.lm                             el--utf8
74cdf0e10cSrcweirhebrew.lm                            he--utf8
75cdf0e10cSrcweirhindi.lm                             hi--utf8
76cdf0e10cSrcweirhungarian.lm                         hu--utf8
77cdf0e10cSrcweiricelandic.lm                         is--utf8
78cdf0e10cSrcweirindonesian.lm                        id--utf8
79cdf0e10cSrcweirirish_gaelic.lm                      ga--utf8
80cdf0e10cSrcweiritalian.lm                           it--utf8
81cdf0e10cSrcweirjapanese.lm                          ja--utf8
82cdf0e10cSrcweirkorean.lm                            ko--utf8
83cdf0e10cSrcweirlatin.lm                             la--utf8
84cdf0e10cSrcweirlatvian.lm                           lv--utf8
85cdf0e10cSrcweirlithuanian.lm                        lt--utf8
86cdf0e10cSrcweirluxembourgish.lm                     lb--utf8
87cdf0e10cSrcweirmalay.lm                             ms--utf8
88cdf0e10cSrcweirmanx_gaelic.lm                       gv--utf8
89cdf0e10cSrcweirmarathi.lm                           mr--utf8
90cdf0e10cSrcweirmongolian_cyrillic.lm                mn--utf8
91cdf0e10cSrcweirnepali.lm                            ne--utf8
92cdf0e10cSrcweirnorwegian.lm                         nb--utf8       # Norwegian (Bokmal)
93cdf0e10cSrcweirpersian.lm                           fa--utf8       # Farsi
94cdf0e10cSrcweirpolish.lm                            pl--utf8
95cdf0e10cSrcweirportuguese.lm                        pt-PT-utf8
96cdf0e10cSrcweirquechua.lm                           qu--utf8
97cdf0e10cSrcweirromanian.lm                          ro--utf8
98cdf0e10cSrcweirromansh.lm                           rm--utf8
99cdf0e10cSrcweirrussian.lm                           ru--utf8
100cdf0e10cSrcweirsanskrit.lm                          sa--utf8
101cdf0e10cSrcweirscots.lm                             sco--utf8
102cdf0e10cSrcweirscots_gaelic.lm                      gd--utf8
103cdf0e10cSrcweirserbian.lm                           sr--utf-8
104cdf0e10cSrcweirserbian-latin.lm                     sh--utf-8
105cdf0e10cSrcweirslovak_ascii.lm                      sk-SK-utf8
106cdf0e10cSrcweirslovenian.lm                         sl--utf8
107cdf0e10cSrcweirspanish.lm                           es--utf8
108cdf0e10cSrcweirswahili.lm                           sw--utf8
109cdf0e10cSrcweirswedish.lm                           sv--utf8
110cdf0e10cSrcweirtagalog.lm                           tl--utf8
111cdf0e10cSrcweirtamil.lm                             ta--utf8
112cdf0e10cSrcweirthai.lm                              th--utf8
113cdf0e10cSrcweirturkish.lm                           tr--utf8
114cdf0e10cSrcweirukrainian.lm                         uk--utf8
115cdf0e10cSrcweirvietnamese.lm                        vi--utf8
116cdf0e10cSrcweirwelsh.lm                             cy--utf8
117cdf0e10cSrcweiryiddish_utf.lm                       yi--utf8
118cdf0e10cSrcweirzulu.lm                              zu--utf8
119