1# Copyright (c) 2003, WiseGuys Internet B.V.
2#
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9# - Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#
12# - Redistributions in binary form must reproduce the above copyright
13# notice, this list of conditions and the following disclaimer in the
14# documentation and/or other materials provided with the distribution.
15#
16# - Neither the name of the WiseGuys Internet B.V. nor the names of its
17# contributors may be used to endorse or promote products derived from
18# this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#
32
33# A sample config file for the language models
34# provided with Gertjan van Noords language guesser
35# (http://odur.let.rug.nl/~vannoord/TextCat/)
36#
37# Notes:
38# - You may consider eliminating a couple of small languages from this
39# list because they cause false positives with big languages and are
40# bad for performance. (Do you really want to recognize Drents?)
41# - Putting the most probable languages at the top of the list
42# improves performance, because this will raise the threshold for
43# likely candidates more quickly.
44#
45
46# this file have been modified (to OOo by Jocelyn MERAND
47# joc.merATgmail.com) to include country and encoding
48# guess strings are made as following : language-country-encoding
49
50afrikaans.lm                         af--utf8
51albanian.lm                          sq--utf8
52amharic_utf.lm                       am--utf8
53arabic.lm                            ar--utf8
54basque.lm                            eu--utf8
55belarus.lm                           be--utf8
56bosnian.lm                           bs--utf8
57breton.lm                            br--utf8
58catalan.lm                           ca--utf8
59chinese_simplified.lm                zh-CN-utf8
60chinese_traditional.lm               zh-TW-utf8
61croatian.lm                          hr--utf8
62czech.lm                             cs--utf8
63danish.lm                            da--utf8
64dutch.lm                             nl--utf8
65english.lm                           en--utf8
66esperanto.lm                         eo--utf8
67estonian.lm                          et--utf8
68finnish.lm                           fi--utf8
69french.lm                            fr--utf8
70frisian.lm                           fy--utf8
71georgian.lm                          ka--utf8
72german.lm                            de--utf8
73greek.lm                             el--utf8
74hebrew.lm                            he--utf8
75hindi.lm                             hi--utf8
76hungarian.lm                         hu--utf8
77icelandic.lm                         is--utf8
78indonesian.lm                        id--utf8
79irish_gaelic.lm                      ga--utf8
80italian.lm                           it--utf8
81japanese.lm                          ja--utf8
82korean.lm                            ko--utf8
83latin.lm                             la--utf8
84latvian.lm                           lv--utf8
85lithuanian.lm                        lt--utf8
86luxembourgish.lm                     lb--utf8
87malay.lm                             ms--utf8
88manx_gaelic.lm                       gv--utf8
89marathi.lm                           mr--utf8
90mongolian_cyrillic.lm                mn--utf8
91nepali.lm                            ne--utf8
92norwegian.lm                         nb--utf8       # Norwegian (Bokmal)
93persian.lm                           fa--utf8       # Farsi
94polish.lm                            pl--utf8
95portuguese.lm                        pt-PT-utf8
96quechua.lm                           qu--utf8
97romanian.lm                          ro--utf8
98romansh.lm                           rm--utf8
99russian.lm                           ru--utf8
100sanskrit.lm                          sa--utf8
101scots.lm                             sco--utf8
102scots_gaelic.lm                      gd--utf8
103serbian.lm                           sr--utf-8
104serbian-latin.lm                     sh--utf-8
105slovak_ascii.lm                      sk-SK-utf8
106slovenian.lm                         sl--utf8
107spanish.lm                           es--utf8
108swahili.lm                           sw--utf8
109swedish.lm                           sv--utf8
110tagalog.lm                           tl--utf8
111tamil.lm                             ta--utf8
112thai.lm                              th--utf8
113turkish.lm                           tr--utf8
114ukrainian.lm                         uk--utf8
115vietnamese.lm                        vi--utf8
116welsh.lm                             cy--utf8
117yiddish_utf.lm                       yi--utf8
118zulu.lm                              zu--utf8
119