1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Classes that hold units of .properties, and similar, files that are used in
23 translating Java, Mozilla, MacOS and other software.
24
25 The L{propfile} class is a monolingual class with L{propunit} providing unit
26 level access.
27
28 The .properties store has become a general key value pair class with
29 L{Dialect} providing the ability to change the behaviour of the parsing
30 and handling of the various dialects.
31
32 Currently we support::
33 * Java .properties
34 * Mozilla .properties
35 * Adobe Flex files
36 * MacOS X .strings files
37 * Skype .lang files
38
39
40 Dialects
41 ========
42 The following provides references and descriptions of the various dialects supported::
43
44 Java
45 ----
46 Java .properties are supported completely except for the ability to drop
47 pairs that are not translated.
48
49 The following U{.properties file
50 description<http://java.sun.com/j2se/1.4.2/docs/api/java/util/Properties.html#load(java.io.InputStream)>}
51 and U{example <http://www.exampledepot.com/egs/java.util/Props.html>} give
52 some good references to the .properties specification.
53
54 Properties file may also hold Java
55 U{MessageFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/MessageFormat.html>}
56 messages. No special handling is provided in this storage class for
57 MessageFormat, but this may be implemented in future.
58
59 All delimiter types, comments, line continuations and spaces handling in
60 delimeters are supported.
61
62 Mozilla
63 -------
64 Mozilla files use '=' as a delimiter, are UTF-8 encoded and thus don't need \\u
65 escaping. Any \\U values will be converted to correct Unicode characters.
66 `
67 Strings
68 -------
69 Mac OS X strings files are implemented using
70 U{these<http://developer.apple.com/mac/library/documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html>}
71 U{two<http://developer.apple.com/mac/library/documentation/Cocoa/Conceptual/LoadingResources/Strings/Strings.html>}
72 articles as references.
73
74 Flex
75 ----
76 Adobe Flex files seem to be normal .properties files but in UTF-8 just like
77 Mozilla files. This
78 U{page<http://livedocs.adobe.com/flex/3/html/help.html?content=l10n_3.html>}
79 provides the information used to implement the dialect.
80
81 Skype
82 -----
83 Skype .lang files seem to be UTF-16 encoded .properties files.
84
85 Implementation
86 ==============
87
88 A simple summary of what is permissible follows.
89
90 Comments supported::
91 # a comment
92 ! a comment
93 // a comment (only at the beginning of a line)
94 /* a comment (not across multiple lines) */
95
96 Name and Value pairs::
97 # Delimiters
98 key = value
99 key : value
100 key value
101
102 # Space in key and around value
103 \ key\ = \ value
104
105 # Note that the b and c are escaped for epydoc rendering
106 b = a string with escape sequences \\t \\n \\r \\\\ \\" \\' \\ (space) \u0123
107 c = a string with a continuation line \\
108 continuation line
109
110 # Special cases
111 # key with no value
112 key
113 # value no key (extractable in prop2po but not mergeable in po2prop)
114 =value
115
116 # .strings specific
117 "key" = "value";
118 """
119
120 from translate.storage import base
121 from translate.misc import quote
122 from translate.misc.typecheck import accepts, returns, IsOneOf
123 from translate.lang import data
124 import re
125 import warnings
126
127
128
129
130 eol = "\n"
131
132
133 @accepts(unicode, [unicode])
134 @returns(IsOneOf(type(None), unicode), int)
135 -def _find_delimiter(line, delimiters):
136 """Find the type and position of the delimiter in a property line.
137
138 Property files can be delimeted by "=", ":" or whitespace (space for now).
139 We find the position of each delimiter, then find the one that appears
140 first.
141
142 @param line: A properties line
143 @type line: str
144 @param delimiters: valid delimiters
145 @type delimiters: list
146 @return: delimiter character and offset within L{line}
147 @rtype: Tuple (delimiter char, Offset Integer)
148 """
149 delimiter_dict = {}
150 for delimiter in delimiters:
151 delimiter_dict[delimiter] = -1
152 delimiters = delimiter_dict
153
154 for delimiter, pos in delimiters.iteritems():
155 prewhitespace = len(line) - len(line.lstrip())
156 pos = line.find(delimiter, prewhitespace)
157 while pos != -1:
158 if delimiters[delimiter] == -1 and line[pos-1] != u"\\":
159 delimiters[delimiter] = pos
160 break
161 pos = line.find(delimiter, pos + 1)
162
163 mindelimiter = None
164 minpos = -1
165 for delimiter, pos in delimiters.iteritems():
166 if pos == -1 or delimiter == u" ":
167 continue
168 if minpos == -1 or pos < minpos:
169 minpos = pos
170 mindelimiter = delimiter
171 if mindelimiter is None and delimiters.get(u" ", -1) != -1:
172
173 return (u" ", delimiters[" "])
174 if mindelimiter is not None and u" " in delimiters and delimiters[u" "] < delimiters[mindelimiter]:
175
176
177
178 if len(line[delimiters[u" "]:delimiters[mindelimiter]].strip()) > 0:
179 return (u" ", delimiters[u" "])
180 return (mindelimiter, minpos)
181
184 """Spelling error that is kept around for in case someone relies on it.
185
186 Deprecated."""
187 warnings.warn("deprecated use Dialect.find_delimiter instead", DeprecationWarning)
188 return _find_delimiter(line, DialectJava.delimiters)
189
194 """Determine whether L{line} has a line continuation marker.
195
196 .properties files can be terminated with a backslash (\\) indicating
197 that the 'value' continues on the next line. Continuation is only
198 valid if there are an odd number of backslashses (an even number
199 would result in a set of N/2 slashes not an escape)
200
201 @param line: A properties line
202 @type line: str
203 @return: Does L{line} end with a line continuation
204 @rtype: Boolean
205 """
206 pos = -1
207 count = 0
208 if len(line) == 0:
209 return False
210
211
212 while len(line) >= -pos and line[pos:][0] == "\\":
213 pos -= 1
214 count += 1
215 return (count % 2) == 1
216
217
218 @accepts(unicode)
219 @returns(unicode)
220 -def _key_strip(key):
221 """Cleanup whitespace found around a key
222
223 @param key: A properties key
224 @type key: str
225 @return: Key without any uneeded whitespace
226 @rtype: str
227 """
228 newkey = key.rstrip()
229
230 if newkey[-1:] == "\\":
231 newkey += key[len(newkey):len(newkey)+1]
232 return newkey.lstrip()
233
234 dialects = {}
235 default_dialect = "java"
240
244
274
280 register_dialect(DialectJava)
286 register_dialect(DialectFlex)
297 register_dialect(DialectMozilla)
308 register_dialect(DialectSkype)
312 name = "strings"
313 default_encoding = "utf-16"
314 delimiters = [u"="]
315 pair_terminator = u";"
316 key_wrap_char = u'"'
317 value_wrap_char = u'"'
318
320 """Strip uneeded characters from the key"""
321 newkey = key.rstrip().rstrip('"')
322
323 if newkey[-1:] == "\\":
324 newkey += key[len(newkey):len(newkey)+1]
325 return newkey.lstrip().lstrip('"')
326 key_strip = classmethod(key_strip)
327
329 """Strip uneeded characters from the value"""
330 newvalue = value.rstrip().rstrip(';').rstrip('"')
331
332 if newvalue[-1:] == "\\":
333 newvalue += value[len(newvalue):len(newvalue)+1]
334 return newvalue.lstrip().lstrip('"')
335 value_strip = classmethod(value_strip)
336
339 encode = classmethod(encode)
340 register_dialect(DialectStrings)
341
342
343 -class propunit(base.TranslationUnit):
344 """an element of a properties file i.e. a name and value, and any comments
345 associated"""
346
347 - def __init__(self, source="", personality="java"):
357
362
366
367 source = property(getsource, setsource)
368
373
375 translation = quote.propertiesdecode(self.translation)
376 translation = re.sub(u"\\\\ ", u" ", translation)
377 return translation
378
379 target = property(gettarget, settarget)
380
387
389 """convert the element back into formatted lines for a .properties
390 file"""
391 notes = self.getnotes()
392 if notes:
393 notes += u"\n"
394 if self.isblank():
395 return notes + u"\n"
396 else:
397 self.value = self.personality.encode(self.source)
398 self.translation = self.personality.encode(self.target)
399 value = self.translation or self.value
400 return u"%(notes)s%(key)s%(del)s%(value)s\n" % {"notes": notes,
401 "key": self.name,
402 "del": self.delimiter,
403 "value": value}
404
407
408 - def addnote(self, text, origin=None, position="append"):
409 if origin in ['programmer', 'developer', 'source code', None]:
410 text = data.forceunicode(text)
411 self.comments.append(text)
412 else:
413 return super(propunit, self).addnote(text, origin=origin,
414 position=position)
415
417 if origin in ['programmer', 'developer', 'source code', None]:
418 return u'\n'.join(self.comments)
419 else:
420 return super(propunit, self).getnotes(origin)
421
424
426 """returns whether this is a blank element, containing only
427 comments."""
428 return not (self.name or self.value)
429
431 return bool(self.name)
432
435
438
439
440 -class propfile(base.TranslationStore):
441 """this class represents a .properties file, made up of propunits"""
442 UnitClass = propunit
443
444 - def __init__(self, inputfile=None, personality="java", encoding=None):
445 """construct a propfile, optionally reading in from inputfile"""
446 super(propfile, self).__init__(unitclass=self.UnitClass)
447 self.personality = get_dialect(personality)
448 self.encoding = encoding
449 self.filename = getattr(inputfile, 'name', '')
450 if inputfile is not None:
451 propsrc = inputfile.read()
452 inputfile.close()
453 self.parse(propsrc)
454
455 - def parse(self, propsrc):
456 """read the source of a properties file in and include them as units"""
457 newunit = propunit("", self.personality.name)
458 inmultilinevalue = False
459 if self.encoding is not None:
460 propsrc = unicode(propsrc, self.encoding)
461 else:
462 propsrc = unicode(propsrc, self.personality.default_encoding)
463 for line in propsrc.split(u"\n"):
464
465 line = quote.rstripeol(line)
466 if inmultilinevalue:
467 newunit.value += line.lstrip()
468
469 inmultilinevalue = is_line_continuation(newunit.value)
470
471 if inmultilinevalue:
472
473 newunit.value = newunit.value[:-1]
474 if not inmultilinevalue:
475
476 self.addunit(newunit)
477 newunit = propunit("", self.personality.name)
478
479
480
481 elif line.strip()[:1] in (u'#', u'!') or line.strip()[:2] in (u"/*", u"//") or line.strip()[:-2] == "*/":
482
483 newunit.comments.append(line)
484 elif not line.strip():
485
486 if str(newunit).strip():
487 self.addunit(newunit)
488 newunit = propunit("", self.personality.name)
489 else:
490 newunit.delimiter, delimiter_pos = self.personality.find_delimiter(line)
491 if delimiter_pos == -1:
492 newunit.name = self.personality.key_strip(line)
493 newunit.value = u""
494 self.addunit(newunit)
495 newunit = propunit("", self.personality.name)
496 else:
497 newunit.name = self.personality.key_strip(line[:delimiter_pos])
498 if is_line_continuation(line[delimiter_pos+1:].lstrip()):
499 inmultilinevalue = True
500 newunit.value = line[delimiter_pos+1:].lstrip()[:-1]
501 else:
502 newunit.value = self.personality.value_strip(line[delimiter_pos+1:])
503 self.addunit(newunit)
504 newunit = propunit("", self.personality.name)
505
506 if inmultilinevalue or len(newunit.comments) > 0:
507 self.addunit(newunit)
508
510 """convert the units back to lines"""
511 lines = []
512 for unit in self.units:
513 lines.append(str(unit))
514 return "".join(lines)
515
523