Package translate :: Package lang :: Module data
[hide private]
[frames] | no frames]

Source Code for Module translate.lang.data

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2007-2009 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """This module stores information and functionality that relates to plurals.""" 
 23   
 24  import unicodedata 
 25   
 26  from translate.storage.placeables import StringElem 
 27   
 28   
 29  languages = { 
 30  'af': ('Afrikaans', 2, '(n != 1)'), 
 31  'ak': ('Akan', 2, 'n > 1'), 
 32  'am': ('Amharic', 2, 'n > 1'), 
 33  'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5'), 
 34  'arn': ('Mapudungun; Mapuche', 2, 'n > 1'), 
 35  'az': ('Azerbaijani', 2, '(n != 1)'), 
 36  'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 37  'bg': ('Bulgarian', 2, '(n != 1)'), 
 38  'bn': ('Bengali', 2, '(n != 1)'), 
 39  'bn_IN': ('Bengali (India)', 2, '(n != 1)'), 
 40  'bo': ('Tibetan', 1, '0'), 
 41  'br': ('Breton', 2, 'n > 1'), 
 42  'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 43  'ca': ('Catalan; Valencian', 2, '(n != 1)'), 
 44  'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
 45  'csb': ('Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 46  'cy': ('Welsh', 2, '(n==2) ? 1 : 0'), 
 47  'da': ('Danish', 2, '(n != 1)'), 
 48  'de': ('German', 2, '(n != 1)'), 
 49  'dz': ('Dzongkha', 1, '0'), 
 50  'el': ('Greek', 2, '(n != 1)'), 
 51  'en': ('English', 2, '(n != 1)'), 
 52  'en_GB': ('English (United Kingdom)', 2, '(n != 1)'), 
 53  'en_ZA': ('English (South Africa)', 2, '(n != 1)'), 
 54  'eo': ('Esperanto', 2, '(n != 1)'), 
 55  'es': ('Spanish; Castilian', 2, '(n != 1)'), 
 56  'et': ('Estonian', 2, '(n != 1)'), 
 57  'eu': ('Basque', 2, '(n != 1)'), 
 58  'fa': ('Persian', 1, '0'), 
 59  'fi': ('Finnish', 2, '(n != 1)'), 
 60  'fil': ('Filipino; Pilipino', 2, '(n > 1)'), 
 61  'fo': ('Faroese', 2, '(n != 1)'), 
 62  'fr': ('French', 2, '(n > 1)'), 
 63  'fur': ('Friulian', 2, '(n != 1)'), 
 64  'fy': ('Frisian', 2, '(n != 1)'), 
 65  'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'), 
 66  'gl': ('Galician', 2, '(n != 1)'), 
 67  'gu': ('Gujarati', 2, '(n != 1)'), 
 68  'gun': ('Gun', 2, '(n > 1)'), 
 69  'ha': ('Hausa', 2, '(n != 1)'), 
 70  'he': ('Hebrew', 2, '(n != 1)'), 
 71  'hi': ('Hindi', 2, '(n != 1)'), 
 72  'hy': ('Armenian', 1, '0'), 
 73  'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 74  'hu': ('Hungarian', 2, '(n != 1)'), 
 75  'id': ('Indonesian', 1, '0'), 
 76  'is': ('Icelandic', 2, '(n != 1)'), 
 77  'it': ('Italian', 2, '(n != 1)'), 
 78  'ja': ('Japanese', 1, '0'), 
 79  'jv': ('Javanese', 2, '(n != 1)'), 
 80  'ka': ('Georgian', 1, '0'), 
 81  'km': ('Khmer', 1, '0'), 
 82  'kn': ('Kannada', 2, '(n != 1)'), 
 83  'ko': ('Korean', 1, '0'), 
 84  'ku': ('Kurdish', 2, '(n != 1)'), 
 85  'kw': ('Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'), 
 86  'ky': ('Kirghiz; Kyrgyz', 1, '0'), 
 87  'lb': ('Luxembourgish; Letzeburgesch', 2, '(n != 1)'), 
 88  'ln': ('Lingala', 2, '(n > 1)'), 
 89  'lo': ('Lao', 1, '0'), 
 90  'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 91  'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'), 
 92  'mg': ('Malagasy', 2, '(n > 1)'), 
 93  'mi': ('Maori', 2, '(n > 1)'), 
 94  'mk': ('Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'), 
 95  'ml': ('Malayalam', 2, '(n != 1)'), 
 96  'mn': ('Mongolian', 2, '(n != 1)'), 
 97  'mr': ('Marathi', 2, '(n != 1)'), 
 98  'ms': ('Malay', 1, '0'), 
 99  'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'), 
100  'nah': ('Nahuatl languages', 2, '(n != 1)'), 
101  'nap': ('Neapolitan', 2, '(n != 1)'), 
102  'nb': ('Norwegian Bokmal', 2, '(n != 1)'), 
103  'ne': ('Nepali', 2, '(n != 1)'), 
104  'nl': ('Dutch; Flemish', 2, '(n != 1)'), 
105  'nn': ('Norwegian Nynorsk', 2, '(n != 1)'), 
106  'nso': ('Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'), 
107  'or': ('Oriya', 2, '(n != 1)'), 
108  'pa': ('Panjabi; Punjabi', 2, '(n != 1)'), 
109  'pap': ('Papiamento', 2, '(n != 1)'), 
110  'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
111  'pms': ('Piemontese', 2, '(n != 1)'), 
112  'ps': ('Pushto; Pashto', 2, '(n != 1)'), 
113  'pt': ('Portuguese', 2, '(n != 1)'), 
114  'pt_BR': ('Portuguese (Brazil)', 2, '(n > 1)'), 
115  'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'), 
116  'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
117  'sco': ('Scots', 2, '(n != 1)'), 
118  'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
119  'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'), 
120  'so': ('Somali', 2, '(n != 1)'), 
121  'sq': ('Albanian', 2, '(n != 1)'), 
122  'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
123  'su': ('Sundanese', 1, '0'), 
124  'sv': ('Swedish', 2, '(n != 1)'), 
125  'ta': ('Tamil', 2, '(n != 1)'), 
126  'te': ('Telugu', 2, '(n != 1)'), 
127  'tg': ('Tajik', 2, '(n != 1)'), 
128  'ti': ('Tigrinya', 2, '(n > 1)'), 
129  'th': ('Thai', 1, '0'), 
130  'tk': ('Turkmen', 2, '(n != 1)'), 
131  'tr': ('Turkish', 1, '0'), 
132  'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
133  'vi': ('Vietnamese', 1, '0'), 
134  'wa': ('Walloon', 2, '(n > 1)'), 
135  # Chinese is difficult because the main divide is on script, not really  
136  # country. Simplified Chinese is used mostly in China, Singapore and Malaysia. 
137  # Traditional Chinese is used mostly in Hong Kong, Taiwan and Macau. 
138  'zh_CN': ('Chinese (China)', 1, '0'), 
139  'zh_HK': ('Chinese (Hong Kong)', 1, '0'), 
140  'zh_TW': ('Chinese (Taiwan)', 1, '0'), 
141  } 
142  """Dictionary of language data. 
143  The language code is the dictionary key (which may contain country codes and modifiers). 
144  The value is a tuple: (Full name in English, nplurals, plural equation)""" 
145   
146 -def simplercode(code):
147 """This attempts to simplify the given language code by ignoring country 148 codes, for example. 149 150 @see: 151 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt} 152 - U{http://www.rfc-editor.org/rfc/rfc4646.txt} 153 - U{http://www.rfc-editor.org/rfc/rfc4647.txt} 154 - U{http://www.w3.org/International/articles/language-tags/} 155 """ 156 if not code: 157 return code 158 159 normalized = normalize_code(code) 160 separator = normalized.rfind('-') 161 if separator >= 0: 162 return code[:separator] 163 else: 164 return ""
165 166 167 expansion_factors = { 168 'af': 0.1, 169 'ar': -0.09, 170 'es': 0.21, 171 'fr': 0.28, 172 'it': 0.2, 173 } 174 """Source to target string length expansion factors.""" 175 176 import gettext 177 import locale 178 import re 179 import os 180 181 iso639 = {} 182 """ISO 639 language codes""" 183 iso3166 = {} 184 """ISO 3166 country codes""" 185 186 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$") 187 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$") 188
189 -def languagematch(languagecode, otherlanguagecode):
190 """matches a languagecode to another, ignoring regions in the second""" 191 if languagecode is None: 192 return langcode_re.match(otherlanguagecode) 193 return languagecode == otherlanguagecode or \ 194 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
195 196 dialect_name_re = re.compile(r"(.+)\s\(([^)]+)\)$") 197
198 -def tr_lang(langcode=None):
199 """Gives a function that can translate a language name, even in the form C{"language (country)"}, 200 into the language with iso code langcode, or the system language if no language is specified.""" 201 langfunc = gettext_lang(langcode) 202 countryfunc = gettext_country(langcode) 203 204 def handlelanguage(name): 205 match = dialect_name_re.match(name) 206 if match: 207 language, country = match.groups() 208 return u"%s (%s)" % (langfunc(language), countryfunc(country)) 209 else: 210 return langfunc(name)
211 212 return handlelanguage 213
214 -def gettext_lang(langcode=None):
215 """Returns a gettext function to translate language names into the given 216 language, or the system language if no language is specified.""" 217 if not langcode in iso639: 218 if not langcode: 219 langcode = "" 220 if os.name == "nt": 221 # On Windows the default locale is not used for some reason 222 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True) 223 else: 224 t = gettext.translation('iso_639', fallback=True) 225 else: 226 t = gettext.translation('iso_639', languages=[langcode], fallback=True) 227 iso639[langcode] = t.ugettext 228 return iso639[langcode]
229
230 -def gettext_country(langcode=None):
231 """Returns a gettext function to translate country names into the given 232 language, or the system language if no language is specified.""" 233 if not langcode in iso3166: 234 if not langcode: 235 langcode = "" 236 if os.name == "nt": 237 # On Windows the default locale is not used for some reason 238 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True) 239 else: 240 t = gettext.translation('iso_3166', fallback=True) 241 else: 242 t = gettext.translation('iso_3166', languages=[langcode], fallback=True) 243 iso3166[langcode] = t.ugettext 244 return iso3166[langcode]
245
246 -def normalize(string, normal_form="NFC"):
247 """Return a unicode string in its normalized form 248 249 @param string: The string to be normalized 250 @param normal_form: NFC (default), NFD, NFCK, NFDK 251 @return: Normalized string 252 """ 253 if string is None: 254 return None 255 else: 256 return unicodedata.normalize(normal_form, string)
257
258 -def forceunicode(string):
259 """Ensures that the string is in unicode. 260 261 @param string: A text string 262 @type string: Unicode, String 263 @return: String converted to Unicode and normalized as needed. 264 @rtype: Unicode 265 """ 266 if string is None: 267 return None 268 if isinstance(string, str): 269 encoding = getattr(string, "encoding", "utf-8") 270 string = string.decode(encoding) 271 elif isinstance(string, StringElem): 272 string = unicode(string) 273 return string
274
275 -def normalized_unicode(string):
276 """Forces the string to unicode and does normalization.""" 277 return normalize(forceunicode(string))
278
279 -def normalize_code(code):
280 return code.replace("_", "-").replace("@", "-").lower()
281
282 -def simplify_to_common(language_code, languages=languages):
283 """Simplify language code to the most commonly used form for the 284 language, stripping country information for languages that tend 285 not to be localized differently for different countries""" 286 simpler = simplercode(language_code) 287 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "": 288 return language_code 289 else: 290 return simplify_to_common(simpler)
291