Package translate :: Package lang :: Module data
[hide private]
[frames] | no frames]

Source Code for Module translate.lang.data

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2007-2009 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """This module stores information and functionality that relates to plurals.""" 
 23   
 24  import unicodedata 
 25   
 26  from translate.storage.placeables import StringElem 
 27   
 28   
 29  languages = { 
 30  'af': ('Afrikaans', 2, '(n != 1)'), 
 31  'ak': ('Akan', 2, 'n > 1'), 
 32  'am': ('Amharic', 2, 'n > 1'), 
 33  'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5'), 
 34  'arn': ('Mapudungun; Mapuche', 2, 'n > 1'), 
 35  'az': ('Azerbaijani', 2, '(n != 1)'), 
 36  'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 37  'bg': ('Bulgarian', 2, '(n != 1)'), 
 38  'bn': ('Bengali', 2, '(n != 1)'), 
 39  'bn_IN': ('Bengali (India)', 2, '(n != 1)'), 
 40  'bo': ('Tibetan', 1, '0'), 
 41  'br': ('Breton', 2, 'n > 1'), 
 42  'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 43  'ca': ('Catalan; Valencian', 2, '(n != 1)'), 
 44  'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
 45  'csb': ('Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 46  'cy': ('Welsh', 2, '(n==2) ? 1 : 0'), 
 47  'da': ('Danish', 2, '(n != 1)'), 
 48  'de': ('German', 2, '(n != 1)'), 
 49  'dz': ('Dzongkha', 1, '0'), 
 50  'el': ('Greek', 2, '(n != 1)'), 
 51  'en': ('English', 2, '(n != 1)'), 
 52  'en_GB': ('English (United Kingdom)', 2, '(n != 1)'), 
 53  'en_ZA': ('English (South Africa)', 2, '(n != 1)'), 
 54  'eo': ('Esperanto', 2, '(n != 1)'), 
 55  'es': ('Spanish; Castilian', 2, '(n != 1)'), 
 56  'et': ('Estonian', 2, '(n != 1)'), 
 57  'eu': ('Basque', 2, '(n != 1)'), 
 58  'fa': ('Persian', 1, '0'), 
 59  'fi': ('Finnish', 2, '(n != 1)'), 
 60  'fil': ('Filipino; Pilipino', 2, '(n > 1)'), 
 61  'fo': ('Faroese', 2, '(n != 1)'), 
 62  'fr': ('French', 2, '(n > 1)'), 
 63  'fur': ('Friulian', 2, '(n != 1)'), 
 64  'fy': ('Frisian', 2, '(n != 1)'), 
 65  'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'), 
 66  'gl': ('Galician', 2, '(n != 1)'), 
 67  'gu': ('Gujarati', 2, '(n != 1)'), 
 68  'gun': ('Gun', 2, '(n > 1)'), 
 69  'ha': ('Hausa', 2, '(n != 1)'), 
 70  'he': ('Hebrew', 2, '(n != 1)'), 
 71  'hi': ('Hindi', 2, '(n != 1)'), 
 72  'hy': ('Armenian', 1, '0'), 
 73  'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 74  'hu': ('Hungarian', 2, '(n != 1)'), 
 75  'id': ('Indonesian', 1, '0'), 
 76  'is': ('Icelandic', 2, '(n != 1)'), 
 77  'it': ('Italian', 2, '(n != 1)'), 
 78  'ja': ('Japanese', 1, '0'), 
 79  'jv': ('Javanese', 2, '(n != 1)'), 
 80  'ka': ('Georgian', 1, '0'), 
 81  'km': ('Khmer', 1, '0'), 
 82  'kn': ('Kannada', 2, '(n != 1)'), 
 83  'ko': ('Korean', 1, '0'), 
 84  'ku': ('Kurdish', 2, '(n != 1)'), 
 85  'kw': ('Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'), 
 86  'ky': ('Kirghiz; Kyrgyz', 1, '0'), 
 87  'lb': ('Luxembourgish; Letzeburgesch', 2, '(n != 1)'), 
 88  'ln': ('Lingala', 2, '(n > 1)'), 
 89  'lo': ('Lao', 1, '0'), 
 90  'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 91  'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'), 
 92  'mg': ('Malagasy', 2, '(n > 1)'), 
 93  'mi': ('Maori', 2, '(n > 1)'), 
 94  'mk': ('Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'), 
 95  'ml': ('Malayalam', 2, '(n != 1)'), 
 96  'mn': ('Mongolian', 2, '(n != 1)'), 
 97  'mr': ('Marathi', 2, '(n != 1)'), 
 98  'ms': ('Malay', 1, '0'), 
 99  'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'), 
100  'nah': ('Nahuatl languages', 2, '(n != 1)'), 
101  'nap': ('Neapolitan', 2, '(n != 1)'), 
102  'nb': ('Norwegian Bokmal', 2, '(n != 1)'), 
103  'ne': ('Nepali', 2, '(n != 1)'), 
104  'nl': ('Dutch; Flemish', 2, '(n != 1)'), 
105  'nn': ('Norwegian Nynorsk', 2, '(n != 1)'), 
106  'nso': ('Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'), 
107  'or': ('Oriya', 2, '(n != 1)'), 
108  'pa': ('Panjabi; Punjabi', 2, '(n != 1)'), 
109  'pap': ('Papiamento', 2, '(n != 1)'), 
110  'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
111  'pms': ('Piemontese', 2, '(n != 1)'), 
112  'ps': ('Pushto; Pashto', 2, '(n != 1)'), 
113  'pt': ('Portuguese', 2, '(n != 1)'), 
114  'pt_BR': ('Portuguese (Brazil)', 2, '(n > 1)'), 
115  'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'), 
116  'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
117  'sco': ('Scots', 2, '(n != 1)'), 
118  'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
119  'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'), 
120  'so': ('Somali', 2, '(n != 1)'), 
121  'sq': ('Albanian', 2, '(n != 1)'), 
122  'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
123  'st': ('Sotho, Southern', 2, '(n != 1)'), 
124  'su': ('Sundanese', 1, '0'), 
125  'sv': ('Swedish', 2, '(n != 1)'), 
126  'sw': ('Swahili', 2, '(n != 1)'), 
127  'ta': ('Tamil', 2, '(n != 1)'), 
128  'te': ('Telugu', 2, '(n != 1)'), 
129  'tg': ('Tajik', 2, '(n != 1)'), 
130  'ti': ('Tigrinya', 2, '(n > 1)'), 
131  'th': ('Thai', 1, '0'), 
132  'tk': ('Turkmen', 2, '(n != 1)'), 
133  'tr': ('Turkish', 1, '0'), 
134  'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
135  'vi': ('Vietnamese', 1, '0'), 
136  'wa': ('Walloon', 2, '(n > 1)'), 
137  # Chinese is difficult because the main divide is on script, not really  
138  # country. Simplified Chinese is used mostly in China, Singapore and Malaysia. 
139  # Traditional Chinese is used mostly in Hong Kong, Taiwan and Macau. 
140  'zh_CN': ('Chinese (China)', 1, '0'), 
141  'zh_HK': ('Chinese (Hong Kong)', 1, '0'), 
142  'zh_TW': ('Chinese (Taiwan)', 1, '0'), 
143  'zu': ('Zulu', 2, '(n != 1)'), 
144  } 
145  """Dictionary of language data. 
146  The language code is the dictionary key (which may contain country codes and modifiers). 
147  The value is a tuple: (Full name in English, nplurals, plural equation)""" 
148   
149 -def simplercode(code):
150 """This attempts to simplify the given language code by ignoring country 151 codes, for example. 152 153 @see: 154 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt} 155 - U{http://www.rfc-editor.org/rfc/rfc4646.txt} 156 - U{http://www.rfc-editor.org/rfc/rfc4647.txt} 157 - U{http://www.w3.org/International/articles/language-tags/} 158 """ 159 if not code: 160 return code 161 162 normalized = normalize_code(code) 163 separator = normalized.rfind('-') 164 if separator >= 0: 165 return code[:separator] 166 else: 167 return ""
168 169 170 expansion_factors = { 171 'af': 0.1, 172 'ar': -0.09, 173 'es': 0.21, 174 'fr': 0.28, 175 'it': 0.2, 176 } 177 """Source to target string length expansion factors.""" 178 179 import gettext 180 import locale 181 import re 182 import os 183 184 iso639 = {} 185 """ISO 639 language codes""" 186 iso3166 = {} 187 """ISO 3166 country codes""" 188 189 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$") 190 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$") 191
192 -def languagematch(languagecode, otherlanguagecode):
193 """matches a languagecode to another, ignoring regions in the second""" 194 if languagecode is None: 195 return langcode_re.match(otherlanguagecode) 196 return languagecode == otherlanguagecode or \ 197 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
198 199 dialect_name_re = re.compile(r"(.+)\s\(([^)]+)\)$") 200
201 -def tr_lang(langcode=None):
202 """Gives a function that can translate a language name, even in the form C{"language (country)"}, 203 into the language with iso code langcode, or the system language if no language is specified.""" 204 langfunc = gettext_lang(langcode) 205 countryfunc = gettext_country(langcode) 206 207 def handlelanguage(name): 208 match = dialect_name_re.match(name) 209 if match: 210 language, country = match.groups() 211 return u"%s (%s)" % (langfunc(language), countryfunc(country)) 212 else: 213 return langfunc(name)
214 215 return handlelanguage 216
217 -def gettext_lang(langcode=None):
218 """Returns a gettext function to translate language names into the given 219 language, or the system language if no language is specified.""" 220 if not langcode in iso639: 221 if not langcode: 222 langcode = "" 223 if os.name == "nt": 224 # On Windows the default locale is not used for some reason 225 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True) 226 else: 227 t = gettext.translation('iso_639', fallback=True) 228 else: 229 t = gettext.translation('iso_639', languages=[langcode], fallback=True) 230 iso639[langcode] = t.ugettext 231 return iso639[langcode]
232
233 -def gettext_country(langcode=None):
234 """Returns a gettext function to translate country names into the given 235 language, or the system language if no language is specified.""" 236 if not langcode in iso3166: 237 if not langcode: 238 langcode = "" 239 if os.name == "nt": 240 # On Windows the default locale is not used for some reason 241 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True) 242 else: 243 t = gettext.translation('iso_3166', fallback=True) 244 else: 245 t = gettext.translation('iso_3166', languages=[langcode], fallback=True) 246 iso3166[langcode] = t.ugettext 247 return iso3166[langcode]
248
249 -def normalize(string, normal_form="NFC"):
250 """Return a unicode string in its normalized form 251 252 @param string: The string to be normalized 253 @param normal_form: NFC (default), NFD, NFCK, NFDK 254 @return: Normalized string 255 """ 256 if string is None: 257 return None 258 else: 259 return unicodedata.normalize(normal_form, string)
260
261 -def forceunicode(string):
262 """Ensures that the string is in unicode. 263 264 @param string: A text string 265 @type string: Unicode, String 266 @return: String converted to Unicode and normalized as needed. 267 @rtype: Unicode 268 """ 269 if string is None: 270 return None 271 if isinstance(string, str): 272 encoding = getattr(string, "encoding", "utf-8") 273 string = string.decode(encoding) 274 elif isinstance(string, StringElem): 275 string = unicode(string) 276 return string
277
278 -def normalized_unicode(string):
279 """Forces the string to unicode and does normalization.""" 280 return normalize(forceunicode(string))
281
282 -def normalize_code(code):
283 return code.replace("_", "-").replace("@", "-").lower()
284
285 -def simplify_to_common(language_code, languages=languages):
286 """Simplify language code to the most commonly used form for the 287 language, stripping country information for languages that tend 288 not to be localized differently for different countries""" 289 simpler = simplercode(language_code) 290 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "": 291 return language_code 292 else: 293 return simplify_to_common(simpler)
294