1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': ('Afrikaans', 2, '(n != 1)'),
31 'ak': ('Akan', 2, 'n > 1'),
32 'am': ('Amharic', 2, 'n > 1'),
33 'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5'),
34 'arn': ('Mapudungun; Mapuche', 2, 'n > 1'),
35 'az': ('Azerbaijani', 2, '(n != 1)'),
36 'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
37 'bg': ('Bulgarian', 2, '(n != 1)'),
38 'bn': ('Bengali', 2, '(n != 1)'),
39 'bn_IN': ('Bengali (India)', 2, '(n != 1)'),
40 'bo': ('Tibetan', 1, '0'),
41 'br': ('Breton', 2, 'n > 1'),
42 'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
43 'ca': ('Catalan; Valencian', 2, '(n != 1)'),
44 'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
45 'csb': ('Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
46 'cy': ('Welsh', 2, '(n==2) ? 1 : 0'),
47 'da': ('Danish', 2, '(n != 1)'),
48 'de': ('German', 2, '(n != 1)'),
49 'dz': ('Dzongkha', 1, '0'),
50 'el': ('Greek', 2, '(n != 1)'),
51 'en': ('English', 2, '(n != 1)'),
52 'en_GB': ('English (United Kingdom)', 2, '(n != 1)'),
53 'en_ZA': ('English (South Africa)', 2, '(n != 1)'),
54 'eo': ('Esperanto', 2, '(n != 1)'),
55 'es': ('Spanish; Castilian', 2, '(n != 1)'),
56 'et': ('Estonian', 2, '(n != 1)'),
57 'eu': ('Basque', 2, '(n != 1)'),
58 'fa': ('Persian', 1, '0'),
59 'fi': ('Finnish', 2, '(n != 1)'),
60 'fil': ('Filipino; Pilipino', 2, '(n > 1)'),
61 'fo': ('Faroese', 2, '(n != 1)'),
62 'fr': ('French', 2, '(n > 1)'),
63 'fur': ('Friulian', 2, '(n != 1)'),
64 'fy': ('Frisian', 2, '(n != 1)'),
65 'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
66 'gl': ('Galician', 2, '(n != 1)'),
67 'gu': ('Gujarati', 2, '(n != 1)'),
68 'gun': ('Gun', 2, '(n > 1)'),
69 'ha': ('Hausa', 2, '(n != 1)'),
70 'he': ('Hebrew', 2, '(n != 1)'),
71 'hi': ('Hindi', 2, '(n != 1)'),
72 'hy': ('Armenian', 1, '0'),
73 'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
74 'hu': ('Hungarian', 2, '(n != 1)'),
75 'id': ('Indonesian', 1, '0'),
76 'is': ('Icelandic', 2, '(n != 1)'),
77 'it': ('Italian', 2, '(n != 1)'),
78 'ja': ('Japanese', 1, '0'),
79 'jv': ('Javanese', 2, '(n != 1)'),
80 'ka': ('Georgian', 1, '0'),
81 'km': ('Khmer', 1, '0'),
82 'kn': ('Kannada', 2, '(n != 1)'),
83 'ko': ('Korean', 1, '0'),
84 'ku': ('Kurdish', 2, '(n != 1)'),
85 'kw': ('Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
86 'ky': ('Kirghiz; Kyrgyz', 1, '0'),
87 'lb': ('Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
88 'ln': ('Lingala', 2, '(n > 1)'),
89 'lo': ('Lao', 1, '0'),
90 'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
91 'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
92 'mg': ('Malagasy', 2, '(n > 1)'),
93 'mi': ('Maori', 2, '(n > 1)'),
94 'mk': ('Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
95 'ml': ('Malayalam', 2, '(n != 1)'),
96 'mn': ('Mongolian', 2, '(n != 1)'),
97 'mr': ('Marathi', 2, '(n != 1)'),
98 'ms': ('Malay', 1, '0'),
99 'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
100 'nah': ('Nahuatl languages', 2, '(n != 1)'),
101 'nap': ('Neapolitan', 2, '(n != 1)'),
102 'nb': ('Norwegian Bokmal', 2, '(n != 1)'),
103 'ne': ('Nepali', 2, '(n != 1)'),
104 'nl': ('Dutch; Flemish', 2, '(n != 1)'),
105 'nn': ('Norwegian Nynorsk', 2, '(n != 1)'),
106 'nso': ('Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'),
107 'or': ('Oriya', 2, '(n != 1)'),
108 'pa': ('Panjabi; Punjabi', 2, '(n != 1)'),
109 'pap': ('Papiamento', 2, '(n != 1)'),
110 'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
111 'pms': ('Piemontese', 2, '(n != 1)'),
112 'ps': ('Pushto; Pashto', 2, '(n != 1)'),
113 'pt': ('Portuguese', 2, '(n != 1)'),
114 'pt_BR': ('Portuguese (Brazil)', 2, '(n > 1)'),
115 'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
116 'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
117 'sco': ('Scots', 2, '(n != 1)'),
118 'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
119 'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
120 'so': ('Somali', 2, '(n != 1)'),
121 'sq': ('Albanian', 2, '(n != 1)'),
122 'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
123 'su': ('Sundanese', 1, '0'),
124 'sv': ('Swedish', 2, '(n != 1)'),
125 'ta': ('Tamil', 2, '(n != 1)'),
126 'te': ('Telugu', 2, '(n != 1)'),
127 'tg': ('Tajik', 2, '(n != 1)'),
128 'ti': ('Tigrinya', 2, '(n > 1)'),
129 'th': ('Thai', 1, '0'),
130 'tk': ('Turkmen', 2, '(n != 1)'),
131 'tr': ('Turkish', 1, '0'),
132 'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
133 'vi': ('Vietnamese', 1, '0'),
134 'wa': ('Walloon', 2, '(n > 1)'),
135
136
137
138 'zh_CN': ('Chinese (China)', 1, '0'),
139 'zh_HK': ('Chinese (Hong Kong)', 1, '0'),
140 'zh_TW': ('Chinese (Taiwan)', 1, '0'),
141 }
142 """Dictionary of language data.
143 The language code is the dictionary key (which may contain country codes and modifiers).
144 The value is a tuple: (Full name in English, nplurals, plural equation)"""
145
147 """This attempts to simplify the given language code by ignoring country
148 codes, for example.
149
150 @see:
151 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
152 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
153 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
154 - U{http://www.w3.org/International/articles/language-tags/}
155 """
156 if not code:
157 return code
158
159 normalized = normalize_code(code)
160 separator = normalized.rfind('-')
161 if separator >= 0:
162 return code[:separator]
163 else:
164 return ""
165
166
167 expansion_factors = {
168 'af': 0.1,
169 'ar': -0.09,
170 'es': 0.21,
171 'fr': 0.28,
172 'it': 0.2,
173 }
174 """Source to target string length expansion factors."""
175
176 import gettext
177 import locale
178 import re
179 import os
180
181 iso639 = {}
182 """ISO 639 language codes"""
183 iso3166 = {}
184 """ISO 3166 country codes"""
185
186 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
187 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
188
190 """matches a languagecode to another, ignoring regions in the second"""
191 if languagecode is None:
192 return langcode_re.match(otherlanguagecode)
193 return languagecode == otherlanguagecode or \
194 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
195
196 dialect_name_re = re.compile(r"(.+)\s\(([^)]+)\)$")
197
199 """Gives a function that can translate a language name, even in the form C{"language (country)"},
200 into the language with iso code langcode, or the system language if no language is specified."""
201 langfunc = gettext_lang(langcode)
202 countryfunc = gettext_country(langcode)
203
204 def handlelanguage(name):
205 match = dialect_name_re.match(name)
206 if match:
207 language, country = match.groups()
208 return u"%s (%s)" % (langfunc(language), countryfunc(country))
209 else:
210 return langfunc(name)
211
212 return handlelanguage
213
214 -def gettext_lang(langcode=None):
215 """Returns a gettext function to translate language names into the given
216 language, or the system language if no language is specified."""
217 if not langcode in iso639:
218 if not langcode:
219 langcode = ""
220 if os.name == "nt":
221
222 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
223 else:
224 t = gettext.translation('iso_639', fallback=True)
225 else:
226 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
227 iso639[langcode] = t.ugettext
228 return iso639[langcode]
229
230 -def gettext_country(langcode=None):
231 """Returns a gettext function to translate country names into the given
232 language, or the system language if no language is specified."""
233 if not langcode in iso3166:
234 if not langcode:
235 langcode = ""
236 if os.name == "nt":
237
238 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
239 else:
240 t = gettext.translation('iso_3166', fallback=True)
241 else:
242 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
243 iso3166[langcode] = t.ugettext
244 return iso3166[langcode]
245
247 """Return a unicode string in its normalized form
248
249 @param string: The string to be normalized
250 @param normal_form: NFC (default), NFD, NFCK, NFDK
251 @return: Normalized string
252 """
253 if string is None:
254 return None
255 else:
256 return unicodedata.normalize(normal_form, string)
257
259 """Ensures that the string is in unicode.
260
261 @param string: A text string
262 @type string: Unicode, String
263 @return: String converted to Unicode and normalized as needed.
264 @rtype: Unicode
265 """
266 if string is None:
267 return None
268 if isinstance(string, str):
269 encoding = getattr(string, "encoding", "utf-8")
270 string = string.decode(encoding)
271 elif isinstance(string, StringElem):
272 string = unicode(string)
273 return string
274
276 """Forces the string to unicode and does normalization."""
277 return normalize(forceunicode(string))
278
281
283 """Simplify language code to the most commonly used form for the
284 language, stripping country information for languages that tend
285 not to be localized differently for different countries"""
286 simpler = simplercode(language_code)
287 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
288 return language_code
289 else:
290 return simplify_to_common(simpler)
291