1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 from translate.misc.multistring import multistring
26 from translate.misc import quote
27 from translate.misc import textwrap
28 from translate.lang import data
29 from translate.storage import pocommon, base
30 import re
31 import copy
32 import cStringIO
33 import poparser
34
35 lsep = "\n#: "
36 """Seperator for #: entries"""
37
38
39
40 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
41 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
42
44 """Escapes a line for po format. assumes no \n occurs in the line.
45
46 @param line: unescaped text
47 """
48 special_locations = []
49 for special_key in po_escape_map:
50 special_locations.extend(quote.find_all(line, special_key))
51 special_locations = dict.fromkeys(special_locations).keys()
52 special_locations.sort()
53 escaped_line = ""
54 last_location = 0
55 for location in special_locations:
56 escaped_line += line[last_location:location]
57 escaped_line += po_escape_map[line[location:location+1]]
58 last_location = location+1
59 escaped_line += line[last_location:]
60 return escaped_line
61
65
67 """Wrap text for po files."""
68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
69
70
71 if len(wrappedlines) > 1:
72 for index, line in enumerate(wrappedlines[1:]):
73 if line.startswith(' '):
74
75 wrappedlines[index+1] = line[1:]
76
77
78 wrappedlines[index] += ' '
79 return wrappedlines
80
82 """quotes the given text for a PO file, returning quoted and escaped lines"""
83 polines = []
84 if text is None:
85 return polines
86 lines = text.split("\n")
87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
88 if len(lines) != 2 or lines[1]:
89 polines.extend(['""'])
90 for line in lines[:-1]:
91
92 lns = wrapline(line)
93 if len(lns) > 0:
94 for ln in lns[:-1]:
95 polines.extend(['"' + escapeforpo(ln) + '"'])
96 if lns[-1]:
97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
98 else:
99 polines.extend(['"\\n"'])
100 if lines[-1]:
101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
102 return polines
103
105 """Remove quote and unescape line from po file.
106
107 @param line: a quoted line from a po file (msgid or msgstr)
108 """
109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
110 return extracted
111
114
116 """Tests whether the given encoding is known in the python runtime, or returns utf-8.
117 This function is used to ensure that a valid encoding is always used."""
118 if encoding == "CHARSET" or encoding == None:
119 return 'utf-8'
120 return encoding
121
122
123
124
125
126
127
128
130 return lst == [] or len(lst) == 1 and lst[0] == '""'
131
133 left = string.find('"')
134 right = string.rfind('"')
135 if right > -1:
136 return string[left:right+1]
137 else:
138 return string[left:] + '"'
139
140 -class pounit(pocommon.pounit):
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155 __shallow__ = ['_store']
156
157 - def __init__(self, source=None, encoding="UTF-8"):
158 self._encoding = encodingToUse(encoding)
159 self.obsolete = False
160 self._initallcomments(blankall=True)
161 self.prev_msgctxt = []
162 self.prev_msgid = []
163 self.prev_msgid_plural = []
164 self.msgctxt = []
165 self.msgid = []
166 self.msgid_pluralcomments = []
167 self.msgid_plural = []
168 self.msgstr = []
169 self.obsoletemsgctxt = []
170 self.obsoletemsgid = []
171 self.obsoletemsgid_pluralcomments = []
172 self.obsoletemsgid_plural = []
173 self.obsoletemsgstr = []
174 pocommon.pounit.__init__(self, source)
175
185
193
194 allcomments = property(_get_all_comments)
195
204
222
226
228 """Sets the msgid to the given (unescaped) value.
229
230 @param source: an unescaped source string.
231 """
232 self.msgid, self.msgid_plural = self._set_source_vars(source)
233 source = property(getsource, setsource)
234
236 """Returns the unescaped msgid"""
237 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
238
240 """Sets the msgid to the given (unescaped) value.
241
242 @param source: an unescaped source string.
243 """
244 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
245 prev_source = property(_get_prev_source, _set_prev_source)
246
254
256 """Sets the msgstr to the given (unescaped) value"""
257 self._rich_target = None
258 if isinstance(target, str):
259 target = target.decode(self._encoding)
260 if self.hasplural():
261 if isinstance(target, multistring):
262 target = target.strings
263 elif isinstance(target, basestring):
264 target = [target]
265 elif isinstance(target, (dict, list)):
266 if len(target) == 1:
267 target = target[0]
268 else:
269 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
270 templates = self.msgstr
271 if isinstance(templates, list):
272 templates = {0: templates}
273 if isinstance(target, list):
274 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
275 elif isinstance(target, dict):
276 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
277 else:
278 self.msgstr = quoteforpo(target)
279 target = property(gettarget, settarget)
280
282 """Return comments based on origin value (programmer, developer, source code and translator)"""
283 if origin == None:
284 comments = u"".join([comment[2:] for comment in self.othercomments])
285 comments += u"".join([comment[3:] for comment in self.automaticcomments])
286 elif origin == "translator":
287 comments = u"".join ([comment[2:] for comment in self.othercomments])
288 elif origin in ["programmer", "developer", "source code"]:
289 comments = u"".join([comment[3:] for comment in self.automaticcomments])
290 else:
291 raise ValueError("Comment type not valid")
292
293 return comments[:-1]
294
295 - def addnote(self, text, origin=None, position="append"):
296 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
297
298 if not (text and text.strip()):
299 return
300 text = data.forceunicode(text)
301 commentlist = self.othercomments
302 linestart = "# "
303 if origin in ["programmer", "developer", "source code"]:
304 autocomments = True
305 commentlist = self.automaticcomments
306 linestart = "#. "
307 text = text.split("\n")
308 if position == "append":
309 commentlist += [linestart + line + "\n" for line in text]
310 else:
311 newcomments = [linestart + line + "\n" for line in text]
312 newcomments += [line for line in commentlist]
313 if autocomments:
314 self.automaticcomments = newcomments
315 else:
316 self.othercomments = newcomments
317
319 """Remove all the translator's notes (other comments)"""
320 self.othercomments = []
321
323
324 new_unit = self.__class__()
325
326
327 shallow = set(self.__shallow__)
328
329 for key, value in self.__dict__.iteritems():
330 if key not in shallow:
331 setattr(new_unit, key, copy.deepcopy(value))
332
333 for key in set(shallow):
334 setattr(new_unit, key, getattr(self, key))
335
336
337 memo[id(self)] = self
338
339 return new_unit
340
342 return copy.deepcopy(self)
343
349
351 if isinstance(self.msgstr, dict):
352 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()])
353 return len(combinedstr.strip())
354 else:
355 return len(unquotefrompo(self.msgstr).strip())
356
357 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
358 """Merges the otherpo (with the same msgid) into this one.
359
360 Overwrite non-blank self.msgstr only if overwrite is True
361 merge comments only if comments is True
362 """
363
364 def mergelists(list1, list2, split=False):
365
366 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
367 for position, item in enumerate(list1):
368 if isinstance(item, str):
369 list1[position] = item.decode("utf-8")
370 for position, item in enumerate(list2):
371 if isinstance(item, str):
372 list2[position] = item.decode("utf-8")
373
374
375 lineend = ""
376 if list1 and list1[0]:
377 for candidate in ["\n", "\r", "\n\r"]:
378 if list1[0].endswith(candidate):
379 lineend = candidate
380 if not lineend:
381 lineend = ""
382 else:
383 lineend = "\n"
384
385
386 if split:
387 splitlist1 = []
388 splitlist2 = []
389 prefix = "#"
390 for item in list1:
391 splitlist1.extend(item.split()[1:])
392 prefix = item.split()[0]
393 for item in list2:
394 splitlist2.extend(item.split()[1:])
395 prefix = item.split()[0]
396 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
397 else:
398
399 if list1 != list2:
400 for item in list2:
401 if lineend:
402 item = item.rstrip() + lineend
403
404 if item not in list1 or len(item) < 5:
405 list1.append(item)
406 if not isinstance(otherpo, pounit):
407 super(pounit, self).merge(otherpo, overwrite, comments)
408 return
409 if comments:
410 mergelists(self.othercomments, otherpo.othercomments)
411 mergelists(self.typecomments, otherpo.typecomments)
412 if not authoritative:
413
414
415 mergelists(self.automaticcomments, otherpo.automaticcomments)
416 mergelists(self.msgidcomments, otherpo.msgidcomments)
417 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
418 if not self.istranslated() or overwrite:
419
420 if self._extract_msgidcomments(otherpo.target):
421 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
422 self.target = otherpo.target
423 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
424 self.markfuzzy()
425 else:
426 self.markfuzzy(otherpo.isfuzzy())
427 elif not otherpo.istranslated():
428 if self.source != otherpo.source:
429 self.markfuzzy()
430 else:
431 if self.target != otherpo.target:
432 self.markfuzzy()
433
435
436
437 return (is_null(self.msgid)
438 and not is_null(self.msgstr)
439 and self.msgidcomments == []
440 and is_null(self.msgctxt)
441 )
442
444 if self.isheader() or len(self.msgidcomments):
445 return False
446 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
447 return True
448 return False
449
450
451
452
457
465
475
478
481
484
486 """Makes this unit obsolete"""
487 self.obsolete = True
488 if self.msgctxt:
489 self.obsoletemsgctxt = self.msgctxt
490 if self.msgid:
491 self.obsoletemsgid = self.msgid
492 self.msgid = []
493 if self.msgidcomments:
494 self.obsoletemsgidcomments = self.msgidcomments
495 self.msgidcomments = []
496 if self.msgid_plural:
497 self.obsoletemsgid_plural = self.msgid_plural
498 self.msgid_plural = []
499 if self.msgstr:
500 self.obsoletemsgstr = self.msgstr
501 self.msgstr = []
502 self.sourcecomments = []
503 self.automaticcomments = []
504
506 """Makes an obsolete unit normal"""
507 self.obsolete = False
508 if self.obsoletemsgctxt:
509 self.msgid = self.obsoletemsgctxt
510 self.obsoletemsgctxt = []
511 if self.obsoletemsgid:
512 self.msgid = self.obsoletemsgid
513 self.obsoletemsgid = []
514 if self.obsoletemsgidcomments:
515 self.msgidcomments = self.obsoletemsgidcomments
516 self.obsoletemsgidcomments = []
517 if self.obsoletemsgid_plural:
518 self.msgid_plural = self.obsoletemsgid_plural
519 self.obsoletemsgid_plural = []
520 if self.obsoletemsgstr:
521 self.msgstr = self.obsoletemsgstr
522 self.obsoletemgstr = []
523
525 """returns whether this pounit contains plural strings..."""
526 return len(self.msgid_plural) > 0
527
530
532 if isinstance(partlines, dict):
533 partkeys = partlines.keys()
534 partkeys.sort()
535 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
536 partstr = partname + " "
537 partstartline = 0
538 if len(partlines) > 0 and len(partcomments) == 0:
539 partstr += partlines[0]
540 partstartline = 1
541 elif len(partcomments) > 0:
542 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
543
544 partstr += partlines[0] + '\n'
545
546 if len(partlines) > 1:
547 partstartline += 1
548 else:
549
550 partstr += '""\n'
551
552 if len(partcomments) > 1:
553 combinedcomment = []
554 for comment in partcomments:
555 comment = unquotefrompo([comment])
556 if comment.startswith("_:"):
557 comment = comment[len("_:"):]
558 if comment.endswith("\\n"):
559 comment = comment[:-len("\\n")]
560
561 combinedcomment.append(comment)
562 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
563
564 partstr += "\n".join(partcomments)
565 partstr = quote.rstripeol(partstr)
566 else:
567 partstr += '""'
568 partstr += '\n'
569
570 for partline in partlines[partstartline:]:
571 partstr += partline + '\n'
572 return partstr
573
575 """encodes unicode strings and returns other strings unchanged"""
576 if isinstance(output, unicode):
577 encoding = encodingToUse(getattr(self, "encoding", "UTF-8"))
578 return output.encode(encoding)
579 return output
580
582 """convert to a string. double check that unicode is handled somehow here"""
583 output = self._getoutput()
584 return self._encodeifneccessary(output)
585
587 """return this po element as a string"""
588 def add_prev_msgid_lines(lines, header, var):
589 if len(var) > 0:
590 lines.append("#| %s %s\n" % (header, var[0]))
591 lines.extend("#| %s\n" % line for line in var[1:])
592
593 def add_prev_msgid_info(lines):
594 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt)
595 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid)
596 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural)
597
598 lines = []
599 lines.extend(self.othercomments)
600 if self.isobsolete():
601 lines.extend(self.typecomments)
602 obsoletelines = []
603 if self.obsoletemsgctxt:
604 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
605 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
606 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
607 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
608 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
609 for index, obsoleteline in enumerate(obsoletelines):
610
611 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
612 lines.extend(obsoletelines)
613 lines = [self._encodeifneccessary(line) for line in lines]
614 return "".join(lines)
615
616
617 if is_null(self.msgid):
618 if not (self.isheader() or self.getcontext() or self.sourcecomments):
619 return "".join(lines)
620 lines.extend(self.automaticcomments)
621 lines.extend(self.sourcecomments)
622 lines.extend(self.typecomments)
623 add_prev_msgid_info(lines)
624 if self.msgctxt:
625 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
626 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments))
627 if self.msgid_plural or self.msgid_pluralcomments:
628 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
629 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
630 lines = [self._encodeifneccessary(line) for line in lines]
631 postr = "".join(lines)
632 return postr
633
635 """Get a list of locations from sourcecomments in the PO unit
636
637 rtype: List
638 return: A list of the locations with '#: ' stripped
639
640 """
641 locations = []
642 for sourcecomment in self.sourcecomments:
643 locations += quote.rstripeol(sourcecomment)[3:].split()
644 return locations
645
647 """Add a location to sourcecomments in the PO unit
648
649 @param location: Text location e.g. 'file.c:23' does not include #:
650 @type location: String
651
652 """
653 self.sourcecomments.append("#: %s\n" % location)
654
665
666 - def getcontext(self):
667 """Get the message context."""
668 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
669
671 """Returns a unique identifier for this unit."""
672 context = self.getcontext()
673
674
675
676
677
678 id = self.source
679 if self.msgidcomments:
680 id = "_: %s\n%s" % (context, id)
681 elif context:
682 id = "%s\04%s" % (context, id)
683 return id
684
685 -class pofile(pocommon.pofile):
686 """this represents a .po file containing various units"""
687 UnitClass = pounit
689 """construct a pofile, optionally reading in from inputfile.
690 encoding can be specified but otherwise will be read from the PO header"""
691 self.UnitClass = unitclass
692 pocommon.pofile.__init__(self, unitclass=unitclass)
693 self.units = []
694 self.filename = ''
695 self._encoding = encodingToUse(encoding)
696 if inputfile is not None:
697 self.parse(inputfile)
698
700 """Deprecated: changes the encoding on the file."""
701
702
703
704 raise DeprecationWarning
705
706 self._encoding = encodingToUse(newencoding)
707 if not self.units:
708 return
709 header = self.header()
710 if not header or header.isblank():
711 return
712 charsetline = None
713 headerstr = unquotefrompo(header.msgstr)
714 for line in headerstr.split("\n"):
715 if not ":" in line:
716 continue
717 key, value = line.strip().split(":", 1)
718 if key.strip() != "Content-Type":
719 continue
720 charsetline = line
721 if charsetline is None:
722 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
723 else:
724 charset = re.search("charset=([^ ]*)", charsetline)
725 if charset is None:
726 newcharsetline = charsetline
727 if not newcharsetline.strip().endswith(";"):
728 newcharsetline += ";"
729 newcharsetline += " charset=%s" % self._encoding
730 else:
731 charset = charset.group(1)
732 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
733 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
734 header.msgstr = quoteforpo(headerstr)
735
737 """parses the given file or file source string"""
738 try:
739 if hasattr(input, 'name'):
740 self.filename = input.name
741 elif not getattr(self, 'filename', ''):
742 self.filename = ''
743 if isinstance(input, str):
744 input = cStringIO.StringIO(input)
745 poparser.parse_units(poparser.ParseState(input, pounit), self)
746 except Exception, e:
747 raise base.ParseError(e)
748
750 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
751
752
753 id_dict = {}
754 uniqueunits = []
755
756
757 markedpos = []
758 def addcomment(thepo):
759 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
760 markedpos.append(thepo)
761 for thepo in self.units:
762 id = thepo.getid()
763 if thepo.isheader() and not thepo.getlocations():
764
765 uniqueunits.append(thepo)
766 elif id in id_dict:
767 if duplicatestyle == "merge":
768 if id:
769 id_dict[id].merge(thepo)
770 else:
771 addcomment(thepo)
772 uniqueunits.append(thepo)
773 elif duplicatestyle == "msgctxt":
774 origpo = id_dict[id]
775 if origpo not in markedpos:
776 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
777 markedpos.append(thepo)
778 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
779 uniqueunits.append(thepo)
780 else:
781 if not id:
782 if duplicatestyle == "merge":
783 addcomment(thepo)
784 else:
785 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
786 id_dict[id] = thepo
787 uniqueunits.append(thepo)
788 self.units = uniqueunits
789
791 """convert to a string. double check that unicode is handled somehow here"""
792 output = self._getoutput()
793 if isinstance(output, unicode):
794 return output.encode(getattr(self, "encoding", "UTF-8"))
795 return output
796
798 """convert the units back to lines"""
799 lines = []
800 for unit in self.units:
801 unitsrc = str(unit) + "\n"
802 lines.append(unitsrc)
803 lines = "".join(self.encode(lines)).rstrip()
804
805 if lines:
806 lines += "\n"
807 return lines
808
810 """encode any unicode strings in lines in self._encoding"""
811 newlines = []
812 encoding = self._encoding
813 if encoding is None or encoding.lower() == "charset":
814 encoding = 'UTF-8'
815 for line in lines:
816 if isinstance(line, unicode):
817 line = line.encode(encoding)
818 newlines.append(line)
819 return newlines
820
822 """decode any non-unicode strings in lines with self._encoding"""
823 newlines = []
824 for line in lines:
825 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
826 try:
827 line = line.decode(self._encoding)
828 except UnicodeError, e:
829 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
830 newlines.append(line)
831 return newlines
832
837