Package translate :: Package storage :: Module properties
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.properties

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2004-2006 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """Classes that hold units of .properties, and similar, files that are used in 
 23     translating Java, Mozilla, MacOS and other software. 
 24   
 25     The L{propfile} class is a monolingual class with L{propunit} providing unit 
 26     level access. 
 27   
 28     The .properties store has become a general key value pair class with 
 29     L{Dialect} providing the ability to change the behaviour of the parsing 
 30     and handling of the various dialects. 
 31   
 32     Currently we support:: 
 33       * Java .properties 
 34       * Mozilla .properties 
 35       * Adobe Flex files 
 36       * MacOS X .strings files 
 37       * Skype .lang files 
 38   
 39   
 40     Dialects 
 41     ======== 
 42     The following provides references and descriptions of the various dialects supported:: 
 43   
 44     Java 
 45     ---- 
 46     Java .properties are supported completely except for the ability to drop 
 47     pairs that are not translated. 
 48   
 49     The following U{.properties file 
 50     description<http://java.sun.com/j2se/1.4.2/docs/api/java/util/Properties.html#load(java.io.InputStream)>} 
 51     and U{example <http://www.exampledepot.com/egs/java.util/Props.html>} give 
 52     some good references to the .properties specification. 
 53   
 54     Properties file may also hold Java 
 55     U{MessageFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/MessageFormat.html>} 
 56     messages.  No special handling is provided in this storage class for 
 57     MessageFormat, but this may be implemented in future. 
 58   
 59     All delimiter types, comments, line continuations and spaces handling in 
 60     delimeters are supported. 
 61   
 62     Mozilla 
 63     ------- 
 64     Mozilla files use '=' as a delimiter, are UTF-8 encoded and thus don't need \\u 
 65     escaping.  Any \\U values will be converted to correct Unicode characters. 
 66  ` 
 67     Strings 
 68     ------- 
 69     Mac OS X strings files are implemented using 
 70     U{these<http://developer.apple.com/mac/library/documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html>} 
 71     U{two<http://developer.apple.com/mac/library/documentation/Cocoa/Conceptual/LoadingResources/Strings/Strings.html>} 
 72     articles as references. 
 73   
 74     Flex 
 75     ---- 
 76     Adobe Flex files seem to be normal .properties files but in UTF-8 just like 
 77     Mozilla files. This 
 78     U{page<http://livedocs.adobe.com/flex/3/html/help.html?content=l10n_3.html>} 
 79     provides the information used to implement the dialect. 
 80   
 81     Skype 
 82     ----- 
 83     Skype .lang files seem to be UTF-16 encoded .properties files. 
 84   
 85     Implementation 
 86     ============== 
 87   
 88     A simple summary of what is permissible follows. 
 89   
 90     Comments supported:: 
 91       # a comment 
 92       ! a comment 
 93       // a comment (only at the beginning of a line) 
 94       /* a comment (not across multiple lines) */ 
 95   
 96     Name and Value pairs:: 
 97       # Delimiters 
 98       key = value 
 99       key : value 
100       key value 
101   
102       # Space in key and around value 
103       \ key\ = \ value 
104   
105       # Note that the b and c are escaped for epydoc rendering 
106       b = a string with escape sequences \\t \\n \\r \\\\ \\" \\' \\ (space) \u0123 
107       c = a string with a continuation line \\ 
108           continuation line 
109   
110       # Special cases 
111       # key with no value 
112       key 
113       # value no key (extractable in prop2po but not mergeable in po2prop) 
114       =value 
115   
116       # .strings specific 
117       "key" = "value"; 
118  '" 
119  """ 
120   
121  import re 
122  import warnings 
123  import logging 
124   
125  from translate.lang import data 
126  from translate.misc import quote 
127  from translate.misc.typecheck import accepts, returns, IsOneOf 
128  from translate.storage import base 
129   
130  # the rstripeols convert dos <-> unix nicely as well 
131  # output will be appropriate for the platform 
132   
133  eol = "\n" 
134 135 136 @accepts(unicode, [unicode]) 137 @returns(IsOneOf(type(None), unicode), int) 138 -def _find_delimiter(line, delimiters):
139 """Find the type and position of the delimiter in a property line. 140 141 Property files can be delimeted by "=", ":" or whitespace (space for now). 142 We find the position of each delimiter, then find the one that appears 143 first. 144 145 @param line: A properties line 146 @type line: str 147 @param delimiters: valid delimiters 148 @type delimiters: list 149 @return: delimiter character and offset within L{line} 150 @rtype: Tuple (delimiter char, Offset Integer) 151 """ 152 delimiter_dict = {} 153 for delimiter in delimiters: 154 delimiter_dict[delimiter] = -1 155 delimiters = delimiter_dict 156 # Find the position of each delimiter type 157 for delimiter, pos in delimiters.iteritems(): 158 prewhitespace = len(line) - len(line.lstrip()) 159 pos = line.find(delimiter, prewhitespace) 160 while pos != -1: 161 if delimiters[delimiter] == -1 and line[pos-1] != u"\\": 162 delimiters[delimiter] = pos 163 break 164 pos = line.find(delimiter, pos + 1) 165 # Find the first delimiter 166 mindelimiter = None 167 minpos = -1 168 for delimiter, pos in delimiters.iteritems(): 169 if pos == -1 or delimiter == u" ": 170 continue 171 if minpos == -1 or pos < minpos: 172 minpos = pos 173 mindelimiter = delimiter 174 if mindelimiter is None and delimiters.get(u" ", -1) != -1: 175 # Use space delimiter if we found nothing else 176 return (u" ", delimiters[" "]) 177 if mindelimiter is not None and u" " in delimiters and delimiters[u" "] < delimiters[mindelimiter]: 178 # If space delimiter occurs earlier than ":" or "=" then it is the 179 # delimiter only if there are non-whitespace characters between it and 180 # the other detected delimiter. 181 if len(line[delimiters[u" "]:delimiters[mindelimiter]].strip()) > 0: 182 return (u" ", delimiters[u" "]) 183 return (mindelimiter, minpos)
184
185 186 -def find_delimeter(line):
187 """Spelling error that is kept around for in case someone relies on it. 188 189 Deprecated.""" 190 warnings.warn("deprecated use Dialect.find_delimiter instead", DeprecationWarning) 191 return _find_delimiter(line, DialectJava.delimiters)
192
193 194 @accepts(unicode) 195 @returns(bool) 196 -def is_line_continuation(line):
197 """Determine whether L{line} has a line continuation marker. 198 199 .properties files can be terminated with a backslash (\\) indicating 200 that the 'value' continues on the next line. Continuation is only 201 valid if there are an odd number of backslashses (an even number 202 would result in a set of N/2 slashes not an escape) 203 204 @param line: A properties line 205 @type line: str 206 @return: Does L{line} end with a line continuation 207 @rtype: Boolean 208 """ 209 pos = -1 210 count = 0 211 if len(line) == 0: 212 return False 213 # Count the slashes from the end of the line. Ensure we don't 214 # go into infinite loop. 215 while len(line) >= -pos and line[pos:][0] == "\\": 216 pos -= 1 217 count += 1 218 return (count % 2) == 1 # Odd is a line continuation, even is not
219
220 221 @accepts(unicode) 222 @returns(unicode) 223 -def _key_strip(key):
224 """Cleanup whitespace found around a key 225 226 @param key: A properties key 227 @type key: str 228 @return: Key without any uneeded whitespace 229 @rtype: str 230 """ 231 newkey = key.rstrip() 232 # If line now end in \ we put back the whitespace that was escaped 233 if newkey[-1:] == "\\": 234 newkey += key[len(newkey):len(newkey)+1] 235 return newkey.lstrip()
236 237 dialects = {} 238 default_dialect = "java"
239 240 241 -def register_dialect(dialect):
242 dialects[dialect.name] = dialect
243
244 245 -def get_dialect(dialect=default_dialect):
246 return dialects.get(dialect)
247
248 249 -class Dialect(object):
250 """Settings for the various behaviours in key=value files.""" 251 name = None 252 default_encoding = 'iso-8859-1' 253 delimiters = None 254 pair_terminator = u"" 255 key_wrap_char = u"" 256 value_wrap_char = u"" 257 drop_comments = [] 258
259 - def encode(cls, string, encoding=None):
260 """Encode the string""" 261 #FIXME: dialects are a bad idea, not possible for subclasses to override key methods 262 if encoding != "utf-8": 263 return quote.javapropertiesencode(string or u"") 264 return string or u""
265 encode = classmethod(encode) 266
267 - def find_delimiter(cls, line):
268 """Find the delimeter""" 269 return _find_delimiter(line, cls.delimiters)
270 find_delimiter = classmethod(find_delimiter) 271
272 - def key_strip(cls, key):
273 """Strip uneeded characters from the key""" 274 return _key_strip(key)
275 key_strip = classmethod(key_strip) 276
277 - def value_strip(cls, value):
278 """Strip uneeded characters from the value""" 279 return value.lstrip()
280 value_strip = classmethod(value_strip)
281
282 283 -class DialectJava(Dialect):
284 name = "java" 285 default_encoding = "iso-8859-1" 286 delimiters = [u"=", u":", u" "]
287 register_dialect(DialectJava)
288 289 290 -class DialectJavaUtf8(DialectJava):
291 name = "java-utf8" 292 default_encoding = "utf-8" 293 delimiters = [u"=", u":", u" "] 294
295 - def encode(cls, string, encoding=None):
296 return quote.mozillapropertiesencode(string or u"")
297 encode = classmethod(encode)
298 register_dialect(DialectJavaUtf8)
299 300 301 -class DialectFlex(DialectJava):
302 name = "flex" 303 default_encoding = "utf-8"
304 register_dialect(DialectFlex)
305 306 307 -class DialectMozilla(DialectJavaUtf8):
308 name = "mozilla" 309 delimiters = [u"="]
310 register_dialect(DialectMozilla)
311 312 313 -class DialectSkype(Dialect):
314 name = "skype" 315 default_encoding = "utf-16" 316 delimiters = [u"="] 317
318 - def encode(cls, string, encoding=None):
319 return quote.mozillapropertiesencode(string or u"")
320 encode = classmethod(encode)
321 register_dialect(DialectSkype)
322 323 324 -class DialectStrings(Dialect):
325 name = "strings" 326 default_encoding = "utf-16" 327 delimiters = [u"="] 328 pair_terminator = u";" 329 key_wrap_char = u'"' 330 value_wrap_char = u'"' 331 drop_comments = ["/* No comment provided by engineer. */"] 332
333 - def key_strip(cls, key):
334 """Strip uneeded characters from the key""" 335 newkey = key.rstrip().rstrip('"') 336 # If line now end in \ we put back the char that was escaped 337 if newkey[-1:] == "\\": 338 newkey += key[len(newkey):len(newkey)+1] 339 return newkey.lstrip().lstrip('"')
340 key_strip = classmethod(key_strip) 341
342 - def value_strip(cls, value):
343 """Strip uneeded characters from the value""" 344 newvalue = value.rstrip().rstrip(';').rstrip('"') 345 # If line now end in \ we put back the char that was escaped 346 if newvalue[-1:] == "\\": 347 newvalue += value[len(newvalue):len(newvalue)+1] 348 return newvalue.lstrip().lstrip('"')
349 value_strip = classmethod(value_strip) 350
351 - def encode(cls, string, encoding=None):
352 return string.replace('"', '\\"').replace("\n", r"\n").replace("\t", r"\t")
353 encode = classmethod(encode)
354 register_dialect(DialectStrings)
355 356 357 -class propunit(base.TranslationUnit):
358 """an element of a properties file i.e. a name and value, and any comments 359 associated""" 360
361 - def __init__(self, source="", personality="java"):
362 """construct a blank propunit""" 363 self.personality = get_dialect(personality) 364 super(propunit, self).__init__(source) 365 self.name = u"" 366 self.value = u"" 367 self.translation = u"" 368 self.delimiter = u"=" 369 self.comments = [] 370 self.source = source
371
372 - def setsource(self, source):
373 self._rich_source = None 374 source = data.forceunicode(source) 375 self.value = self.personality.encode(source or u"", self.encoding)
376
377 - def getsource(self):
378 value = quote.propertiesdecode(self.value) 379 return value
380 381 source = property(getsource, setsource) 382
383 - def settarget(self, target):
384 self._rich_target = None 385 target = data.forceunicode(target) 386 self.translation = self.personality.encode(target or u"", self.encoding)
387
388 - def gettarget(self):
389 translation = quote.propertiesdecode(self.translation) 390 translation = re.sub(u"\\\\ ", u" ", translation) 391 return translation
392 393 target = property(gettarget, settarget) 394
395 - def _get_encoding(self):
396 if self._store: 397 return self._store.encoding 398 else: 399 return self.personality.default_encoding
400 encoding = property(_get_encoding) 401
402 - def __str__(self):
403 """convert to a string. double check that unicode is handled somehow 404 here""" 405 source = self.getoutput() 406 assert isinstance(source, unicode) 407 return source.encode(self.encoding)
408
409 - def getoutput(self):
410 """convert the element back into formatted lines for a .properties 411 file""" 412 notes = self.getnotes() 413 if notes: 414 notes += u"\n" 415 if self.isblank(): 416 return notes + u"\n" 417 else: 418 self.value = self.personality.encode(self.source, self.encoding) 419 self.translation = self.personality.encode(self.target, self.encoding) 420 value = self.translation or self.value 421 return u"%(notes)s%(key)s%(del)s%(value)s\n" % {"notes": notes, 422 "key": self.name, 423 "del": self.delimiter, 424 "value": value}
425
426 - def getlocations(self):
427 return [self.name]
428
429 - def addnote(self, text, origin=None, position="append"):
430 if origin in ['programmer', 'developer', 'source code', None]: 431 text = data.forceunicode(text) 432 self.comments.append(text) 433 else: 434 return super(propunit, self).addnote(text, origin=origin, 435 position=position)
436
437 - def getnotes(self, origin=None):
438 if origin in ['programmer', 'developer', 'source code', None]: 439 return u'\n'.join(self.comments) 440 else: 441 return super(propunit, self).getnotes(origin)
442
443 - def removenotes(self):
444 self.comments = []
445
446 - def isblank(self):
447 """returns whether this is a blank element, containing only 448 comments.""" 449 return not (self.name or self.value)
450
451 - def istranslatable(self):
452 return bool(self.name)
453
454 - def getid(self):
455 return self.name
456
457 - def setid(self, value):
458 self.name = value
459
460 461 -class propfile(base.TranslationStore):
462 """this class represents a .properties file, made up of propunits""" 463 UnitClass = propunit 464
465 - def __init__(self, inputfile=None, personality="java", encoding=None):
466 """construct a propfile, optionally reading in from inputfile""" 467 super(propfile, self).__init__(unitclass=self.UnitClass) 468 self.personality = get_dialect(personality) 469 self.encoding = encoding or self.personality.default_encoding 470 self.filename = getattr(inputfile, 'name', '') 471 if inputfile is not None: 472 propsrc = inputfile.read() 473 inputfile.close() 474 self.parse(propsrc)
475
476 - def parse(self, propsrc):
477 """read the source of a properties file in and include them as units""" 478 text, encoding = self.detect_encoding(propsrc, default_encodings=[self.personality.default_encoding, 'utf-8', 'utf-16']) 479 self.encoding = encoding 480 propsrc = text 481 482 newunit = propunit("", self.personality.name) 483 inmultilinevalue = False 484 485 for line in propsrc.split(u"\n"): 486 # handle multiline value if we're in one 487 line = quote.rstripeol(line) 488 if inmultilinevalue: 489 newunit.value += line.lstrip() 490 # see if there's more 491 inmultilinevalue = is_line_continuation(newunit.value) 492 # if we're still waiting for more... 493 if inmultilinevalue: 494 # strip the backslash 495 newunit.value = newunit.value[:-1] 496 if not inmultilinevalue: 497 # we're finished, add it to the list... 498 self.addunit(newunit) 499 newunit = propunit("", self.personality.name) 500 # otherwise, this could be a comment 501 # FIXME handle /* */ in a more reliable way 502 # FIXME handle // inline comments 503 elif line.strip()[:1] in (u'#', u'!') or line.strip()[:2] in (u"/*", u"//") or line.strip()[:-2] == "*/": 504 # add a comment 505 if line not in self.personality.drop_comments: 506 newunit.comments.append(line) 507 elif not line.strip(): 508 # this is a blank line... 509 if str(newunit).strip(): 510 self.addunit(newunit) 511 newunit = propunit("", self.personality.name) 512 else: 513 newunit.delimiter, delimiter_pos = self.personality.find_delimiter(line) 514 if delimiter_pos == -1: 515 newunit.name = self.personality.key_strip(line) 516 newunit.value = u"" 517 self.addunit(newunit) 518 newunit = propunit("", self.personality.name) 519 else: 520 newunit.name = self.personality.key_strip(line[:delimiter_pos]) 521 if is_line_continuation(line[delimiter_pos+1:].lstrip()): 522 inmultilinevalue = True 523 newunit.value = line[delimiter_pos+1:].lstrip()[:-1] 524 else: 525 newunit.value = self.personality.value_strip(line[delimiter_pos+1:]) 526 self.addunit(newunit) 527 newunit = propunit("", self.personality.name) 528 # see if there is a leftover one... 529 if inmultilinevalue or len(newunit.comments) > 0: 530 self.addunit(newunit)
531
532 - def __str__(self):
533 """convert the units back to lines""" 534 lines = [] 535 for unit in self.units: 536 lines.append(str(unit)) 537 return "".join(lines)
538
539 540 -class javafile(propfile):
541 Name = _("Java Properties") 542 Exensions = ['properties'] 543
544 - def __init__(self, *args, **kwargs):
545 kwargs['personality'] = "java" 546 kwargs['encoding'] = "auto" 547 super(javafile, self).__init__(*args, **kwargs)
548
549 550 -class javautf8file(propfile):
551 Name = _("Java Properties (UTF-8)") 552 Exensions = ['properties'] 553
554 - def __init__(self, *args, **kwargs):
555 kwargs['personality'] = "java-utf8" 556 kwargs['encoding'] = "utf-8" 557 super(javautf8file, self).__init__(*args, **kwargs)
558
559 560 -class stringsfile(propfile):
561 Name = _("OS X Strings") 562 Extensions = ['strings'] 563
564 - def __init__(self, *args, **kwargs):
565 kwargs['personality'] = "strings" 566 super(stringsfile, self).__init__(*args, **kwargs)
567