1# Copyright (c) 2003, WiseGuys Internet B.V.
2#
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9# - Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#
12# - Redistributions in binary form must reproduce the above copyright
13# notice, this list of conditions and the following disclaimer in the
14# documentation and/or other materials provided with the distribution.
15#
16# - Neither the name of the WiseGuys Internet B.V. nor the names of its
17# contributors may be used to endorse or promote products derived from
18# this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#
32# A sample config file for the language models
33# provided with Gertjan van Noords language guesser
34# (http://odur.let.rug.nl/~vannoord/TextCat/)
35#
36# Notes:
37# - You may consider eliminating a couple of small languages from this
38# list because they cause false positives with big languages and are
39# bad for performance. (Do you really want to recognize Drents?)
40# - Putting the most probable languages at the top of the list
41# improves performance, because this will raise the threshold for
42# likely candidates more quickly.
43#
44
45# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
46# guess strings are made as following : language-country-encoding
47
48afrikaans.lm                         af--utf8
49albanian.lm                          sq--utf8
50amharic_utf.lm                       am--utf8
51arabic.lm                            ar--utf8
52basque.lm                            eu--utf8
53belarus.lm                           be--utf8
54bosnian.lm                           bs--utf8
55breton.lm                            br--utf8
56catalan.lm                           ca--utf8
57chinese_simplified.lm                zh-CN-utf8
58chinese_traditional.lm               zh-TW-utf8
59croatian.lm                          hr--utf8
60czech.lm                             cs--utf8
61danish.lm                            da--utf8
62dutch.lm                             nl--utf8
63english.lm                           en--utf8
64esperanto.lm                         eo--utf8
65estonian.lm                          et--utf8
66finnish.lm                           fi--utf8
67french.lm                            fr--utf8
68frisian.lm                           fy--utf8
69georgian.lm                          ka--utf8
70german.lm                            de--utf8
71greek.lm                             el--utf8
72hebrew.lm                            he--utf8
73hindi.lm                             hi--utf8
74hungarian.lm                         hu--utf8
75icelandic.lm                         is--utf8
76indonesian.lm                        id--utf8
77irish_gaelic.lm                      ga--utf8
78italian.lm                           it--utf8
79japanese.lm                          ja--utf8
80korean.lm                            ko--utf8
81latin.lm                             la--utf8
82latvian.lm                           lv--utf8
83lithuanian.lm                        lt--utf8
84luxembourgish.lm                     lb--utf8
85malay.lm                             ms--utf8
86manx_gaelic.lm                       gv--utf8
87marathi.lm                           mr--utf8
88mongolian_cyrillic.lm                mn--utf8
89nepali.lm                            ne--utf8
90norwegian.lm                         nb--utf8       # Norwegian (Bokmal)
91persian.lm                           fa--utf8       # Farsi
92polish.lm                            pl--utf8
93portuguese.lm                        pt-PT-utf8
94quechua.lm                           qu--utf8
95romanian.lm                          ro--utf8
96romansh.lm                           rm--utf8
97russian.lm                           ru--utf8
98sanskrit.lm                          sa--utf8
99scots.lm                             sco--utf8
100scots_gaelic.lm                      gd--utf8
101serbian.lm                           sr--utf-8
102serbian-latin.lm                     sh--utf-8
103slovak_ascii.lm                      sk-SK-utf8
104slovenian.lm                         sl--utf8
105spanish.lm                           es--utf8
106swahili.lm                           sw--utf8
107swedish.lm                           sv--utf8
108tagalog.lm                           tl--utf8
109tamil.lm                             ta--utf8
110thai.lm                              th--utf8
111turkish.lm                           tr--utf8
112ukrainian.lm                         uk--utf8
113vietnamese.lm                        vi--utf8
114welsh.lm                             cy--utf8
115yiddish_utf.lm                       yi--utf8
116zulu.lm                              zu--utf8
117