1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Classes that hold units of .po files (pounit) or entire files (pofile).
23
24 Gettext-style .po (or .pot) files are used in translations for KDE, GNOME and
25 many other projects.
26
27 This uses libgettextpo from the gettext package. Any version before 0.17 will
28 at least cause some subtle bugs or may not work at all. Developers might want
29 to have a look at gettext-tools/libgettextpo/gettext-po.h from the gettext
30 package for the public API of the library.
31 """
32
33 from translate.misc.multistring import multistring
34 from translate.storage import base, pocommon
35 from translate.storage import pypo
36 from translate.storage.pocommon import encodingToUse
37 from translate.lang import data
38 from ctypes import c_size_t, c_int, c_uint, c_char_p, c_long, CFUNCTYPE, POINTER
39 from ctypes import Structure, cdll
40 import ctypes.util
41 import os
42 import re
43 import sys
44 import tempfile
45 import urllib
46
47 lsep = " "
48 """Seperator for #: entries"""
49
50 STRING = c_char_p
51
52
53
56
57
58 xerror_prototype = CFUNCTYPE(None, c_int, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING)
59 xerror2_prototype = CFUNCTYPE(None, c_int, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING)
60
61
62
66
67
69 _fields_ = [
70 ('error', CFUNCTYPE(None, c_int, c_int, STRING)),
71 ('error_at_line', CFUNCTYPE(None, c_int, c_int, STRING, c_uint, STRING)),
72 ('multiline_warning', CFUNCTYPE(None, STRING, STRING)),
73 ('multiline_error', CFUNCTYPE(None, STRING, STRING)),
74 ]
75
76
77
78 -def xerror_cb(severity, message, filename, lineno, column, multilint_p, message_text):
79 print >> sys.stderr, "xerror_cb", severity, message, filename, lineno, column, multilint_p, message_text
80 if severity >= 1:
81 raise ValueError(message_text)
82
83
84 -def xerror2_cb(severity, message1, filename1, lineno1, column1, multiline_p1, message_text1, message2, filename2, lineno2, column2, multiline_p2, message_text2):
85 print >> sys.stderr, "xerror2_cb", severity, message1, filename1, lineno1, column1, multiline_p1, message_text1, message2, filename2, lineno2, column2, multiline_p2, message_text2
86 if severity >= 1:
87 raise ValueError(message_text1)
88
89
90
91 gpo = None
92
93
94 names = ['gettextpo', 'libgettextpo']
95 for name in names:
96 lib_location = ctypes.util.find_library(name)
97 if lib_location:
98 gpo = cdll.LoadLibrary(lib_location)
99 if gpo:
100 break
101 else:
102
103
104 try:
105 gpo = cdll.LoadLibrary('libgettextpo.so')
106 except OSError, e:
107 raise ImportError("gettext PO library not found")
108
109
110
111 gpo.po_file_read_v3.argtypes = [STRING, POINTER(po_xerror_handler)]
112 gpo.po_file_write_v2.argtypes = [c_int, STRING, POINTER(po_xerror_handler)]
113 gpo.po_file_write_v2.retype = c_int
114
115
116 gpo.po_file_domain_header.restype = STRING
117 gpo.po_header_field.restype = STRING
118 gpo.po_header_field.argtypes = [STRING, STRING]
119
120
121 gpo.po_filepos_file.restype = STRING
122 gpo.po_message_filepos.restype = c_int
123 gpo.po_message_filepos.argtypes = [c_int, c_int]
124 gpo.po_message_add_filepos.argtypes = [c_int, STRING, c_size_t]
125
126
127 gpo.po_message_comments.restype = STRING
128 gpo.po_message_extracted_comments.restype = STRING
129 gpo.po_message_prev_msgctxt.restype = STRING
130 gpo.po_message_prev_msgid.restype = STRING
131 gpo.po_message_prev_msgid_plural.restype = STRING
132 gpo.po_message_is_format.restype = c_int
133 gpo.po_message_is_format.argtypes = [c_int, STRING]
134 gpo.po_message_set_format.argtypes = [c_int, STRING, c_int]
135 gpo.po_message_msgctxt.restype = STRING
136 gpo.po_message_msgid.restype = STRING
137 gpo.po_message_msgid_plural.restype = STRING
138 gpo.po_message_msgstr.restype = STRING
139 gpo.po_message_msgstr_plural.restype = STRING
140
141
142 gpo.po_message_set_comments.argtypes = [c_int, STRING]
143 gpo.po_message_set_extracted_comments.argtypes = [c_int, STRING]
144 gpo.po_message_set_fuzzy.argtypes = [c_int, c_int]
145 gpo.po_message_set_msgctxt.argtypes = [c_int, STRING]
146
147
148 xerror_handler = po_xerror_handler()
149 xerror_handler.xerror = xerror_prototype(xerror_cb)
150 xerror_handler.xerror2 = xerror2_prototype(xerror2_cb)
151
152
155
156
159
160
163
164
166 """Returns the libgettextpo version
167
168 @rtype: three-value tuple
169 @return: libgettextpo version in the following format::
170 (major version, minor version, subminor version)
171 """
172 libversion = c_long.in_dll(gpo, 'libgettextpo_version')
173 major = libversion.value >> 16
174 minor = libversion.value >> 8
175 subminor = libversion.value - (major << 16) - (minor << 8)
176 return major, minor, subminor
177
178
179 -class pounit(pocommon.pounit):
180
181 - def __init__(self, source=None, encoding='utf-8', gpo_message=None):
182 self._rich_source = None
183 self._rich_target = None
184 self._encoding = encoding
185 if not gpo_message:
186 self._gpo_message = gpo.po_message_create()
187 if source or source == "":
188 self.source = source
189 self.target = ""
190 elif gpo_message:
191 self._gpo_message = gpo_message
192 self.infer_state()
193
204
205
210 msgid_plural = property(None, setmsgid_plural)
211
213
214 def remove_msgid_comments(text):
215 if not text:
216 return text
217 if text.startswith("_:"):
218 remainder = re.search(r"_: .*\n(.*)", text)
219 if remainder:
220 return remainder.group(1)
221 else:
222 return u""
223 else:
224 return text
225 singular = remove_msgid_comments((gpo.po_message_msgid(self._gpo_message) or "").decode(self._encoding))
226 if singular:
227 if self.hasplural():
228 multi = multistring(singular, self._encoding)
229 pluralform = (gpo.po_message_msgid_plural(self._gpo_message) or "").decode(self._encoding)
230 multi.strings.append(pluralform)
231 return multi
232 else:
233 return singular
234 else:
235 return u""
236
249 source = property(getsource, setsource)
250
252 if self.hasplural():
253 plurals = []
254 nplural = 0
255 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
256 while plural:
257 plurals.append(plural.decode(self._encoding))
258 nplural += 1
259 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
260 if plurals:
261 multi = multistring(plurals, encoding=self._encoding)
262 else:
263 multi = multistring(u"")
264 else:
265 multi = (gpo.po_message_msgstr(self._gpo_message) or "").decode(self._encoding)
266 return multi
267
269
270 if self.hasplural():
271 if isinstance(target, multistring):
272 target = target.strings
273 elif isinstance(target, basestring):
274 target = [target]
275
276 elif isinstance(target, (dict, list)):
277 if len(target) == 1:
278 target = target[0]
279 else:
280 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
281
282
283
284
285
286 if isinstance(target, (dict, list)):
287 i = 0
288 message = gpo.po_message_msgstr_plural(self._gpo_message, i)
289 while message is not None:
290 gpo.po_message_set_msgstr_plural(self._gpo_message, i, None)
291 i += 1
292 message = gpo.po_message_msgstr_plural(self._gpo_message, i)
293
294 if isinstance(target, list):
295 for i in range(len(target)):
296 targetstring = target[i]
297 if isinstance(targetstring, unicode):
298 targetstring = targetstring.encode(self._encoding)
299 gpo.po_message_set_msgstr_plural(self._gpo_message, i, targetstring)
300
301 elif isinstance(target, dict):
302 for i, targetstring in enumerate(target.itervalues()):
303 gpo.po_message_set_msgstr_plural(self._gpo_message, i, targetstring)
304
305 else:
306 if isinstance(target, unicode):
307 target = target.encode(self._encoding)
308 if target is None:
309 gpo.po_message_set_msgstr(self._gpo_message, "")
310 else:
311 gpo.po_message_set_msgstr(self._gpo_message, target)
312 target = property(gettarget, settarget)
313
315 """The unique identifier for this unit according to the convensions in
316 .mo files."""
317 id = (gpo.po_message_msgid(self._gpo_message) or "").decode(self._encoding)
318
319
320
321
322
323
324
325 context = gpo.po_message_msgctxt(self._gpo_message)
326 if context:
327 id = u"%s\04%s" % (context.decode(self._encoding), id)
328 return id
329
331 if origin == None:
332 comments = gpo.po_message_comments(self._gpo_message) + \
333 gpo.po_message_extracted_comments(self._gpo_message)
334 elif origin == "translator":
335 comments = gpo.po_message_comments(self._gpo_message)
336 elif origin in ["programmer", "developer", "source code"]:
337 comments = gpo.po_message_extracted_comments(self._gpo_message)
338 else:
339 raise ValueError("Comment type not valid")
340
341 if comments and get_libgettextpo_version() < (0, 17, 0):
342 comments = "\n".join([line for line in comments.split("\n")])
343
344 return comments[:-1].decode(self._encoding)
345
346 - def addnote(self, text, origin=None, position="append"):
347
348 if not (text and text.strip()):
349 return
350 text = data.forceunicode(text)
351 oldnotes = self.getnotes(origin)
352 newnotes = None
353 if oldnotes:
354 if position == "append":
355 newnotes = oldnotes + "\n" + text
356 elif position == "merge":
357 if oldnotes != text:
358 oldnoteslist = oldnotes.split("\n")
359 for newline in text.split("\n"):
360 newline = newline.rstrip("\r")
361
362 if newline not in oldnotes or len(newline) < 5:
363 oldnoteslist.append(newline)
364 newnotes = "\n".join(oldnoteslist)
365 else:
366 newnotes = text + '\n' + oldnotes
367 else:
368 newnotes = "\n".join([line.rstrip("\r") for line in text.split("\n")])
369
370 if newnotes:
371 newlines = []
372 needs_space = get_libgettextpo_version() < (0, 17, 0)
373 for line in newnotes.split("\n"):
374 if line and needs_space:
375 newlines.append(" " + line)
376 else:
377 newlines.append(line)
378 newnotes = "\n".join(newlines).encode(self._encoding)
379 if origin in ["programmer", "developer", "source code"]:
380 gpo.po_message_set_extracted_comments(self._gpo_message, newnotes)
381 else:
382 gpo.po_message_set_comments(self._gpo_message, newnotes)
383
385 gpo.po_message_set_comments(self._gpo_message, "")
386
388 newpo = self.__class__()
389 newpo._gpo_message = self._gpo_message
390 return newpo
391
392 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
426
428
429
430 return self.getid() == "" and len(self.target) > 0
431
434
437
440
447
448
449
450
451
452
453
455 gpo.po_message_set_fuzzy(self._gpo_message, present)
456
458
459
460 gpo.po_message_set_obsolete(self._gpo_message, True)
461 self.infer_state()
462
464 gpo.po_message_set_obsolete(self._gpo_message, False)
465 self.infer_state()
466
468 return gpo.po_message_msgid_plural(self._gpo_message) is not None
469
481
485 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
486
488 pf = pofile(noheader=True)
489 pf.addunit(self)
490 return str(pf)
491
493 locations = []
494 i = 0
495 location = gpo.po_message_filepos(self._gpo_message, i)
496 while location:
497 locname = gpo.po_filepos_file(location)
498 locline = gpo.po_filepos_start_line(location)
499 if locline == -1:
500 locstring = locname
501 else:
502 locstring = locname + ":" + str(locline)
503 locations.append(urllib.unquote_plus(locstring))
504 i += 1
505 location = gpo.po_message_filepos(self._gpo_message, i)
506 return locations
507
509 if location.find(" ") != -1:
510 location = urllib.quote_plus(location)
511 parts = location.split(":")
512 file = parts[0]
513 if len(parts) == 2:
514 line = int(parts[1] or "0")
515 else:
516 line = -1
517 gpo.po_message_add_filepos(self._gpo_message, file, line)
518
519 - def getcontext(self):
520 msgctxt = gpo.po_message_msgctxt(self._gpo_message)
521 if msgctxt:
522 return msgctxt.decode(self._encoding)
523 else:
524 msgidcomment = self._extract_msgidcomments()
525 return msgidcomment
526
527 - def setcontext(self, context):
528 context = data.forceunicode(context)
529 gpo.po_message_set_msgctxt(self._gpo_message, context)
530
565 buildfromunit = classmethod(buildfromunit)
566
567
568 -class pofile(pocommon.pofile):
569 UnitClass = pounit
570
571 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit, noheader=False):
572 self._gpo_memory_file = None
573 self._gpo_message_iterator = None
574 self.units = []
575 self.sourcelanguage = None
576 self.targetlanguage = None
577 self._encoding = 'utf-8'
578 if inputfile is None:
579 self._gpo_memory_file = gpo.po_file_create()
580 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
581 if not noheader:
582 self.init_headers()
583 else:
584 super(pofile, self).__init__(inputfile=inputfile, encoding=encoding)
585
586 - def addunit(self, unit, new=True):
587 if new:
588 gpo.po_message_insert(self._gpo_message_iterator, unit._gpo_message)
589 super(pofile, self).addunit(unit)
590
592 header._store = self
593 self.units.insert(0, header)
594 gpo.po_message_iterator_free(self._gpo_message_iterator)
595 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
596 gpo.po_message_insert(self._gpo_message_iterator, header._gpo_message)
597 while gpo.po_next_message(self._gpo_message_iterator):
598 pass
599
601 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
602
603
604 id_dict = {}
605 uniqueunits = []
606
607
608 markedpos = []
609 def addcomment(thepo):
610 thepo.msgidcomment = " ".join(thepo.getlocations())
611 markedpos.append(thepo)
612 for thepo in self.units:
613 id = thepo.getid()
614 if thepo.isheader() and not thepo.getlocations():
615
616 uniqueunits.append(thepo)
617 elif id in id_dict:
618 if duplicatestyle == "merge":
619 if id:
620 id_dict[id].merge(thepo)
621 else:
622 addcomment(thepo)
623 uniqueunits.append(thepo)
624 elif duplicatestyle == "msgctxt":
625 origpo = id_dict[id]
626 if origpo not in markedpos:
627 gpo.po_message_set_msgctxt(origpo._gpo_message, " ".join(origpo.getlocations()))
628 markedpos.append(thepo)
629 gpo.po_message_set_msgctxt(thepo._gpo_message, " ".join(thepo.getlocations()))
630 uniqueunits.append(thepo)
631 else:
632 if not id:
633 if duplicatestyle == "merge":
634 addcomment(thepo)
635 else:
636 gpo.po_message_set_msgctxt(thepo._gpo_message, " ".join(thepo.getlocations()))
637 id_dict[id] = thepo
638 uniqueunits.append(thepo)
639 new_gpo_memory_file = gpo.po_file_create()
640 new_gpo_message_iterator = gpo.po_message_iterator(new_gpo_memory_file, None)
641 for unit in uniqueunits:
642 gpo.po_message_insert(new_gpo_message_iterator, unit._gpo_message)
643 gpo.po_message_iterator_free(self._gpo_message_iterator)
644 self._gpo_message_iterator = new_gpo_message_iterator
645 self._gpo_memory_file = new_gpo_memory_file
646 self.units = uniqueunits
647
649 def obsolete_workaround():
650
651
652
653 for unit in self.units:
654 if unit.isobsolete():
655 gpo.po_message_set_extracted_comments(unit._gpo_message, "")
656 location = gpo.po_message_filepos(unit._gpo_message, 0)
657 while location:
658 gpo.po_message_remove_filepos(unit._gpo_message, 0)
659 location = gpo.po_message_filepos(unit._gpo_message, 0)
660 outputstring = ""
661 if self._gpo_memory_file:
662 obsolete_workaround()
663 f, fname = tempfile.mkstemp(prefix='translate', suffix='.po')
664 os.close(f)
665 self._gpo_memory_file = gpo.po_file_write_v2(self._gpo_memory_file, fname, xerror_handler)
666 f = open(fname)
667 outputstring = f.read()
668 f.close()
669 os.remove(fname)
670 return outputstring
671
673 """Returns True if the object doesn't contain any translation units."""
674 if len(self.units) == 0:
675 return True
676
677 if self.units[0].isheader():
678 units = self.units[1:]
679 else:
680 units = self.units
681
682 for unit in units:
683 if not unit.isblank() and not unit.isobsolete():
684 return False
685 return True
686
688 if hasattr(input, 'name'):
689 self.filename = input.name
690 elif not getattr(self, 'filename', ''):
691 self.filename = ''
692
693 if hasattr(input, "read"):
694 posrc = input.read()
695 input.close()
696 input = posrc
697
698 needtmpfile = not os.path.isfile(input)
699 if needtmpfile:
700
701 fd, fname = tempfile.mkstemp(prefix='translate', suffix='.po')
702 os.write(fd, input)
703 input = fname
704 os.close(fd)
705
706 self._gpo_memory_file = gpo.po_file_read_v3(input, xerror_handler)
707 if self._gpo_memory_file is None:
708 print >> sys.stderr, "Error:"
709
710 if needtmpfile:
711 os.remove(input)
712
713 self.units = []
714
715 self._header = gpo.po_file_domain_header(self._gpo_memory_file, None)
716 if self._header:
717 charset = gpo.po_header_field(self._header, "Content-Type")
718 if charset:
719 charset = re.search("charset=([^\\s]+)", charset).group(1)
720 self._encoding = encodingToUse(charset)
721 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
722 newmessage = gpo.po_next_message(self._gpo_message_iterator)
723 while newmessage:
724 newunit = pounit(gpo_message=newmessage, encoding=self._encoding)
725 self.addunit(newunit, new=False)
726 newmessage = gpo.po_next_message(self._gpo_message_iterator)
727 self._free_iterator()
728
730
731
732 return
733 self._free_iterator()
734 if self._gpo_memory_file is not None:
735 gpo.po_file_free(self._gpo_memory_file)
736 self._gpo_memory_file = None
737
739
740
741 return
742 if self._gpo_message_iterator is not None:
743 gpo.po_message_iterator_free(self._gpo_message_iterator)
744 self._gpo_message_iterator = None
745