1
2
3
4
5
6
7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po
9 and mo files). You can load existing files, iterate through it's entries,
10 add, modify entries, comments or metadata, etc... or create new po files
11 from scratch.
12
13 **polib** provides a simple and pythonic API, exporting only three
14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
16 new files/entries.
17
18 **Basic example**:
19
20 >>> import polib
21 >>> # load an existing po file
22 >>> po = polib.pofile('tests/test_utf8.po')
23 >>> for entry in po:
24 ... # do something with entry...
25 ... pass
26 >>> # add an entry
27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
29 >>> po.append(entry)
30 >>> # to save our modified po file:
31 >>> # po.save()
32 >>> # or you may want to compile the po file
33 >>> # po.save_as_mofile('tests/test_utf8.mo')
34 """
35
36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
37 __version__ = '0.4.1'
38 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
39 'detect_encoding', 'escape', 'unescape']
40
41 import struct
42 import textwrap
43 import warnings
44
45 default_encoding = 'utf-8'
46
47
48
50 """
51 Convenience function that parse the po/pot file *fpath* and return
52 a POFile instance.
53
54 **Keyword arguments**:
55 - *fpath*: string, full or relative path to the po/pot file to parse
56 - *wrapwidth*: integer, the wrap width, only useful when -w option was
57 passed to xgettext (optional, default to 78)
58 - *autodetect_encoding*: boolean, if set to False the function will
59 not try to detect the po file encoding (optional, default to True)
60 - *encoding*: string, an encoding, only relevant if autodetect_encoding
61 is set to False
62
63 **Example**:
64
65 >>> import polib
66 >>> po = polib.pofile('tests/test_weird_occurrences.po')
67 >>> po #doctest: +ELLIPSIS
68 <POFile instance at ...>
69 >>> import os, tempfile
70 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
71 ... orig_po = polib.pofile('tests/'+fname)
72 ... tmpf = tempfile.NamedTemporaryFile().name
73 ... orig_po.save(tmpf)
74 ... try:
75 ... new_po = polib.pofile(tmpf)
76 ... for old, new in zip(orig_po, new_po):
77 ... if old.msgid != new.msgid:
78 ... old.msgid
79 ... new.msgid
80 ... if old.msgstr != new.msgstr:
81 ... old.msgid
82 ... new.msgid
83 ... finally:
84 ... os.unlink(tmpf)
85 """
86 if kwargs.get('autodetect_encoding', True) == True:
87 enc = detect_encoding(fpath)
88 else:
89 enc = kwargs.get('encoding', default_encoding)
90 parser = _POFileParser(fpath)
91 instance = parser.parse()
92 instance.wrapwidth = kwargs.get('wrapwidth', 78)
93 instance.encoding = enc
94 return instance
95
96
97
98
100 """
101 Convenience function that parse the mo file *fpath* and return
102 a MOFile instance.
103
104 **Keyword arguments**:
105 - *fpath*: string, full or relative path to the mo file to parse
106 - *wrapwidth*: integer, the wrap width, only useful when -w option was
107 passed to xgettext to generate the po file that was used to format
108 the mo file (optional, default to 78)
109 - *autodetect_encoding*: boolean, if set to False the function will
110 not try to detect the po file encoding (optional, default to True)
111 - *encoding*: string, an encoding, only relevant if autodetect_encoding
112 is set to False
113
114 **Example**:
115
116 >>> import polib
117 >>> mo = polib.mofile('tests/test_utf8.mo')
118 >>> mo #doctest: +ELLIPSIS
119 <MOFile instance at ...>
120 >>> import os, tempfile
121 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
122 ... orig_mo = polib.mofile('tests/'+fname)
123 ... tmpf = tempfile.NamedTemporaryFile().name
124 ... orig_mo.save(tmpf)
125 ... try:
126 ... new_mo = polib.mofile(tmpf)
127 ... for old, new in zip(orig_mo, new_mo):
128 ... if old.msgid != new.msgid:
129 ... old.msgstr
130 ... new.msgstr
131 ... finally:
132 ... os.unlink(tmpf)
133 """
134 if kwargs.get('autodetect_encoding', True) == True:
135 enc = detect_encoding(fpath)
136 else:
137 enc = kwargs.get('encoding', default_encoding)
138 parser = _MOFileParser(fpath)
139 instance = parser.parse()
140 instance.wrapwidth = kwargs.get('wrapwidth', 78)
141 instance.encoding = enc
142 return instance
143
144
145
146
148 """
149 Try to detect the encoding used by the file *fpath*. The function will
150 return polib default *encoding* if it's unable to detect it.
151
152 **Keyword argument**:
153 - *fpath*: string, full or relative path to the mo file to parse.
154
155 **Examples**:
156
157 >>> print(detect_encoding('tests/test_noencoding.po'))
158 utf-8
159 >>> print(detect_encoding('tests/test_utf8.po'))
160 UTF-8
161 >>> print(detect_encoding('tests/test_utf8.mo'))
162 UTF-8
163 >>> print(detect_encoding('tests/test_iso-8859-15.po'))
164 ISO_8859-15
165 >>> print(detect_encoding('tests/test_iso-8859-15.mo'))
166 ISO_8859-15
167 """
168 import re
169 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
170 f = open(fpath)
171 for l in f:
172 match = rx.search(l)
173 if match:
174 f.close()
175 return match.group(1).strip()
176 f.close()
177 return default_encoding
178
179
180
181
183 """
184 Escape special chars and return the given string *st*.
185
186 **Examples**:
187
188 >>> escape('\\t and \\n and \\r and " and \\\\')
189 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
190 """
191 st = st.replace('\\', r'\\')
192 st = st.replace('\t', r'\t')
193 st = st.replace('\r', r'\r')
194 st = st.replace('\n', r'\n')
195 st = st.replace('\"', r'\"')
196 return st
197
198
199
200
202 """
203 Unescape special chars and return the given string *st*.
204
205 **Examples**:
206
207 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
208 '\\t and \\n and \\r and " and \\\\'
209 """
210 st = st.replace(r'\"', '"')
211 st = st.replace(r'\n', '\n')
212 st = st.replace(r'\r', '\r')
213 st = st.replace(r'\t', '\t')
214 st = st.replace(r'\\', '\\')
215 return st
216
217
218
219
221 """
222 Common parent class for POFile and MOFile classes.
223 This class must **not** be instanciated directly.
224 """
225
227 """
228 Constructor.
229
230 **Keyword arguments**:
231 - *fpath*: string, path to po or mo file
232 - *wrapwidth*: integer, the wrap width, only useful when -w option
233 was passed to xgettext to generate the po file that was used to
234 format the mo file, default to 78 (optional).
235 """
236 list.__init__(self)
237
238 self.fpath = fpath
239
240 self.wrapwidth = wrapwidth
241
242 self.encoding = encoding
243
244 self.header = ''
245
246 self.metadata = {}
247 self.metadata_is_fuzzy = 0
248
250 """String representation of the file."""
251 ret = []
252 entries = [self.metadata_as_entry()] + \
253 [e for e in self if not e.obsolete]
254 for entry in entries:
255 ret.append(entry.__str__(self.wrapwidth))
256 for entry in self.obsolete_entries():
257 ret.append(entry.__str__(self.wrapwidth))
258 return '\n'.join(ret)
259
261 """Return the official string representation of the object."""
262 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
263
265 """Return the metadata as an entry"""
266 e = POEntry(msgid='')
267 mdata = self.ordered_metadata()
268 if mdata:
269 strs = []
270 for name, value in mdata:
271
272 value = '\n'.join([v.strip() for v in value.split('\n')])
273 strs.append('%s: %s' % (name, value))
274 e.msgstr = '\n'.join(strs) + '\n'
275 return e
276
277 - def save(self, fpath=None, repr_method='__str__'):
278 """
279 Save the po file to file *fpath* if no file handle exists for
280 the object. If there's already an open file and no fpath is
281 provided, then the existing file is rewritten with the modified
282 data.
283
284 **Keyword arguments**:
285 - *fpath*: string, full or relative path to the file.
286 - *repr_method*: string, the method to use for output.
287 """
288 if self.fpath is None and fpath is None:
289 raise IOError('You must provide a file path to save() method')
290 contents = getattr(self, repr_method)()
291 if fpath is None:
292 fpath = self.fpath
293 mode = 'w'
294 if repr_method == 'to_binary':
295 mode += 'b'
296 fhandle = open(fpath, mode)
297 fhandle.write(contents)
298 fhandle.close()
299
300 - def find(self, st, by='msgid'):
301 """
302 Find entry which msgid (or property identified by the *by*
303 attribute) matches the string *st*.
304
305 **Keyword arguments**:
306 - *st*: string, the string to search for
307 - *by*: string, the comparison attribute
308
309 **Examples**:
310
311 >>> po = pofile('tests/test_utf8.po')
312 >>> entry = po.find('Thursday')
313 >>> entry.msgstr
314 'Jueves'
315 >>> entry = po.find('Some unexistant msgid')
316 >>> entry is None
317 True
318 >>> entry = po.find('Jueves', 'msgstr')
319 >>> entry.msgid
320 'Thursday'
321 """
322 try:
323 return [e for e in self if getattr(e, by) == st][0]
324 except IndexError:
325 return None
326
359
361 """Return the mofile binary representation."""
362 import struct
363 import array
364 output = ''
365 offsets = []
366 ids = strs = ''
367 entries = self.translated_entries()
368
369 def cmp(_self, other):
370 if _self.msgid > other.msgid:
371 return 1
372 elif _self.msgid < other.msgid:
373 return -1
374 else:
375 return 0
376 entries.sort(cmp)
377
378 mentry = self.metadata_as_entry()
379 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() + '\n'
380 entries = [mentry] + entries
381 entries_len = len(entries)
382 for e in entries:
383
384
385 msgid = e._decode(e.msgid)
386 msgstr = e._decode(e.msgstr)
387 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
388 ids += msgid + '\0'
389 strs += msgstr + '\0'
390
391 keystart = 7*4+16*entries_len
392
393 valuestart = keystart + len(ids)
394 koffsets = []
395 voffsets = []
396
397
398 for o1, l1, o2, l2 in offsets:
399 koffsets += [l1, o1+keystart]
400 voffsets += [l2, o2+valuestart]
401 offsets = koffsets + voffsets
402 output = struct.pack("IIIIIII",
403 0x950412de,
404 0,
405 entries_len,
406 7*4,
407 7*4+entries_len*8,
408 0, 0)
409 output += array.array("I", offsets).tostring()
410 output += ids
411 output += strs
412 return output
413
414
415
416
418 '''
419 Po (or Pot) file reader/writer.
420 POFile objects inherit the list objects methods.
421
422 **Example**:
423
424 >>> po = POFile()
425 >>> entry1 = POEntry(
426 ... msgid="Some english text",
427 ... msgstr="Un texte en anglais"
428 ... )
429 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
430 >>> entry1.comment = "Some useful comment"
431 >>> entry2 = POEntry(
432 ... msgid="Peace in some languages",
433 ... msgstr="Pace سلام שלום Hasîtî 和平"
434 ... )
435 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
436 >>> entry2.comment = "Another useful comment"
437 >>> entry3 = POEntry(
438 ... msgid='Some entry with quotes " \\"',
439 ... msgstr='Un message unicode avec des quotes " \\"'
440 ... )
441 >>> entry3.comment = "Test string quoting"
442 >>> po.append(entry1)
443 >>> po.append(entry2)
444 >>> po.append(entry3)
445 >>> po.header = "Some Header"
446 >>> print(po)
447 # Some Header
448 msgid ""
449 msgstr ""
450 <BLANKLINE>
451 #. Some useful comment
452 #: testfile:12 another_file:1
453 msgid "Some english text"
454 msgstr "Un texte en anglais"
455 <BLANKLINE>
456 #. Another useful comment
457 #: testfile:15 another_file:5
458 msgid "Peace in some languages"
459 msgstr "Pace سلام שלום Hasîtî 和平"
460 <BLANKLINE>
461 #. Test string quoting
462 msgid "Some entry with quotes \\" \\""
463 msgstr "Un message unicode avec des quotes \\" \\""
464 <BLANKLINE>
465 '''
466
468 """Return the string representation of the po file"""
469 ret, headers = '', self.header.split('\n')
470 for header in headers:
471 if header[:1] in [',', ':']:
472 ret += '#%s\n' % header
473 else:
474 ret += '# %s\n' % header
475 return ret + _BaseFile.__str__(self)
476
478 """
479 Save the binary representation of the file to *fpath*.
480
481 **Keyword arguments**:
482 - *fpath*: string, full or relative path to the file.
483 """
484 _BaseFile.save(self, fpath, 'to_binary')
485
487 """
488 Convenience method that return the percentage of translated
489 messages.
490
491 **Example**:
492
493 >>> import polib
494 >>> po = polib.pofile('tests/test_pofile_helpers.po')
495 >>> po.percent_translated()
496 50
497 >>> po = POFile()
498 >>> po.percent_translated()
499 100
500 """
501 total = len([e for e in self if not e.obsolete])
502 if total == 0:
503 return 100
504 translated = len(self.translated_entries())
505 return int((100.00 / float(total)) * translated)
506
508 """
509 Convenience method that return a list of translated entries.
510
511 **Example**:
512
513 >>> import polib
514 >>> po = polib.pofile('tests/test_pofile_helpers.po')
515 >>> len(po.translated_entries())
516 6
517 """
518 return [e for e in self if e.translated() and not e.obsolete]
519
521 """
522 Convenience method that return a list of untranslated entries.
523
524 **Example**:
525
526 >>> import polib
527 >>> po = polib.pofile('tests/test_pofile_helpers.po')
528 >>> len(po.untranslated_entries())
529 6
530 """
531 return [e for e in self if not e.translated() and not e.obsolete]
532
534 """
535 Convenience method that return the list of 'fuzzy' entries.
536
537 **Example**:
538
539 >>> import polib
540 >>> po = polib.pofile('tests/test_pofile_helpers.po')
541 >>> len(po.fuzzy_entries())
542 2
543 """
544 return [e for e in self if 'fuzzy' in e.flags]
545
547 """
548 Convenience method that return the list of obsolete entries.
549
550 **Example**:
551
552 >>> import polib
553 >>> po = polib.pofile('tests/test_pofile_helpers.po')
554 >>> len(po.obsolete_entries())
555 4
556 """
557 return [e for e in self if e.obsolete]
558
559 - def merge(self, refpot):
560 """
561 XXX this could not work if encodings are different, needs thinking
562 and general refactoring of how polib handles encoding...
563
564 Convenience method that merge the current pofile with the pot file
565 provided. It behaves exactly as the gettext msgmerge utility:
566
567 - comments of this file will be preserved, but extracted comments
568 and occurrences will be discarded
569 - any translations or comments in the file will be discarded,
570 however dot comments and file positions will be preserved
571
572 **Keyword argument**:
573 - *refpot*: object POFile, the reference catalog.
574
575 **Example**:
576
577 >>> import polib
578 >>> refpot = polib.pofile('tests/test_merge.pot')
579 >>> po = polib.pofile('tests/test_merge_before.po')
580 >>> po.merge(refpot)
581 >>> expected_po = polib.pofile('tests/test_merge_after.po')
582 >>> str(po) == str(expected_po)
583 True
584 """
585 for entry in refpot:
586 e = self.find(entry.msgid)
587 if e is None:
588 e = POEntry()
589 self.append(e)
590 e.merge(entry)
591
592
593 for entry in self:
594 if refpot.find(entry.msgid) is None:
595 entry.obsolete = True
596
597
598
599
601 '''
602 Mo file reader/writer.
603 MOFile objects inherit the list objects methods.
604
605 **Example**:
606
607 >>> mo = MOFile()
608 >>> entry1 = POEntry(
609 ... msgid="Some english text",
610 ... msgstr="Un texte en anglais"
611 ... )
612 >>> entry2 = POEntry(
613 ... msgid="I need my dirty cheese",
614 ... msgstr="Je veux mon sale fromage"
615 ... )
616 >>> entry3 = MOEntry(
617 ... msgid='Some entry with quotes " \\"',
618 ... msgstr='Un message unicode avec des quotes " \\"'
619 ... )
620 >>> mo.append(entry1)
621 >>> mo.append(entry2)
622 >>> mo.append(entry3)
623 >>> print(mo)
624 msgid ""
625 msgstr ""
626 <BLANKLINE>
627 msgid "Some english text"
628 msgstr "Un texte en anglais"
629 <BLANKLINE>
630 msgid "I need my dirty cheese"
631 msgstr "Je veux mon sale fromage"
632 <BLANKLINE>
633 msgid "Some entry with quotes \\" \\""
634 msgstr "Un message unicode avec des quotes \\" \\""
635 <BLANKLINE>
636 '''
637
638 - def __init__(self, fpath=None, wrapwidth=78):
639 """
640 MOFile constructor.
641 See _BaseFile.__construct.
642 """
643 _BaseFile.__init__(self, fpath, wrapwidth)
644 self.magic_number = None
645 self.version = 0
646
648 """
649 Save the string representation of the file to *fpath*.
650
651 **Keyword argument**:
652 - *fpath*: string, full or relative path to the file.
653 """
654 _BaseFile.save(self, fpath)
655
656 - def save(self, fpath):
657 """
658 Save the binary representation of the file to *fpath*.
659
660 **Keyword argument**:
661 - *fpath*: string, full or relative path to the file.
662 """
663 _BaseFile.save(self, fpath, 'to_binary')
664
666 """
667 Convenience method to keep the same interface with POFile instances.
668 """
669 return 100
670
672 """
673 Convenience method to keep the same interface with POFile instances.
674 """
675 return self
676
678 """
679 Convenience method to keep the same interface with POFile instances.
680 """
681 return []
682
684 """
685 Convenience method to keep the same interface with POFile instances.
686 """
687 return []
688
690 """
691 Convenience method to keep the same interface with POFile instances.
692 """
693 return []
694
695
696
697
698 -class _BaseEntry(object):
699 """
700 Base class for POEntry or MOEntry objects.
701 This class must *not* be instanciated directly.
702 """
703
704 - def __init__(self, *args, **kwargs):
705 """Base Entry constructor."""
706 self.msgid = kwargs.get('msgid', '')
707 self.msgstr = kwargs.get('msgstr', '')
708 self.msgid_plural = kwargs.get('msgid_plural', '')
709 self.msgstr_plural = kwargs.get('msgstr_plural', {})
710 self.obsolete = kwargs.get('obsolete', False)
711 self.encoding = kwargs.get('encoding', default_encoding)
712
713 - def __repr__(self):
714 """Return the official string representation of the object."""
715 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
716
717 - def __str__(self, wrapwidth=78):
718 """
719 Common string representation of the POEntry and MOEntry
720 objects.
721 """
722 if self.obsolete:
723 delflag = '#~ '
724 else:
725 delflag = ''
726
727 ret = []
728 ret += self._str_field("msgid", delflag, "", self.msgid)
729
730 if self.msgid_plural:
731 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
732 if self.msgstr_plural:
733
734 msgstrs = self.msgstr_plural
735 keys = list(msgstrs)
736 keys.sort()
737 for index in keys:
738 msgstr = msgstrs[index]
739 plural_index = '[%s]' % index
740 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
741 else:
742
743 ret += self._str_field("msgstr", delflag, "", self.msgstr)
744 ret.append('')
745 return '\n'.join(ret)
746
747 - def _str_field(self, fieldname, delflag, plural_index, field):
748 field = self._decode(field)
749 lines = field.splitlines(True)
750
751
752 if len(lines) > 1:
753 lines = ['']+lines
754 else:
755 lines = [field]
756 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
757 escape(lines.pop(0)))]
758 for mstr in lines:
759 ret.append('%s"%s"' % (delflag, escape(mstr)))
760 return ret
761
762 - def _decode(self, st):
763 try:
764 if isinstance(st, unicode):
765 st = st.encode(self.encoding)
766 return st
767 except:
768 return st
769
770
771
772
773 -class POEntry(_BaseEntry):
774 """
775 Represents a po file entry.
776
777 **Examples**:
778
779 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
780 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
781 >>> print(entry)
782 #: welcome.py:12 anotherfile.py:34
783 msgid "Welcome"
784 msgstr "Bienvenue"
785 <BLANKLINE>
786 >>> entry = POEntry()
787 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
788 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
789 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
790 >>> entry.flags.append('c-format')
791 >>> entry.msgid = 'I have spam but no egg !'
792 >>> entry.msgid_plural = 'I have spam and %d eggs !'
793 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
794 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
795 >>> print(entry)
796 #. A plural translation. This is a very very very long line please do not
797 #. wrap, this is just for testing comment wrapping...
798 # A plural translation. This is a very very very long line please do not wrap,
799 # this is just for testing comment wrapping...
800 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
801 #: src/eggs.c:45
802 #, c-format
803 msgid "I have spam but no egg !"
804 msgid_plural "I have spam and %d eggs !"
805 msgstr[0] "J'ai du jambon mais aucun oeuf !"
806 msgstr[1] "J'ai du jambon et %d oeufs !"
807 <BLANKLINE>
808 """
809
810 - def __init__(self, *args, **kwargs):
811 """POEntry constructor."""
812 _BaseEntry.__init__(self, *args, **kwargs)
813 self.comment = kwargs.get('comment', '')
814 self.tcomment = kwargs.get('tcomment', '')
815 self.occurrences = kwargs.get('occurrences', [])
816 self.flags = kwargs.get('flags', [])
817
818 - def __str__(self, wrapwidth=78):
819 """
820 Return the string representation of the entry.
821 """
822 if self.obsolete:
823 return _BaseEntry.__str__(self)
824 ret = []
825
826 if self.comment != '':
827 comments = self._decode(self.comment).split('\n')
828 for comment in comments:
829 if wrapwidth > 0 and len(comment) > wrapwidth-3:
830 ret += textwrap.wrap(comment, wrapwidth,
831 initial_indent='#. ',
832 subsequent_indent='#. ',
833 break_long_words=False)
834 else:
835 ret.append('#. %s' % comment)
836
837 if self.tcomment != '':
838 tcomments = self._decode(self.tcomment).split('\n')
839 for tcomment in tcomments:
840 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
841 ret += textwrap.wrap(tcomment, wrapwidth,
842 initial_indent='# ',
843 subsequent_indent='# ',
844 break_long_words=False)
845 else:
846 ret.append('# %s' % tcomment)
847
848 if self.occurrences:
849 filelist = []
850 for fpath, lineno in self.occurrences:
851 if lineno:
852 filelist.append('%s:%s' % (self._decode(fpath), lineno))
853 else:
854 filelist.append('%s' % (self._decode(fpath)))
855 filestr = ' '.join(filelist)
856 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
857
858
859
860
861 lines = textwrap.wrap(filestr.replace('-', '*'),
862 wrapwidth,
863 initial_indent='#: ',
864 subsequent_indent='#: ',
865 break_long_words=False)
866
867 for line in lines:
868 ret.append(line.replace('*', '-'))
869 else:
870 ret.append('#: '+filestr)
871
872 if self.flags:
873 flags = []
874 for flag in self.flags:
875 flags.append(flag)
876 ret.append('#, %s' % ', '.join(flags))
877 ret.append(_BaseEntry.__str__(self))
878 return '\n'.join(ret)
879
880 - def __cmp__(self, other):
881 '''
882 Called by comparison operations if rich comparison is not defined.
883
884 **Tests**:
885 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
886 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
887 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
888 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
889 >>> po = POFile()
890 >>> po.append(a)
891 >>> po.append(b)
892 >>> po.append(c1)
893 >>> po.append(c2)
894 >>> po.sort()
895 >>> print(po)
896 #
897 msgid ""
898 msgstr ""
899 <BLANKLINE>
900 #: a.py:1 a.py:3
901 msgid "c2"
902 msgstr ""
903 <BLANKLINE>
904 #: a.py:1 b.py:1
905 msgid "c1"
906 msgstr ""
907 <BLANKLINE>
908 #: b.py:1 b.py:3
909 msgid "a"
910 msgstr ""
911 <BLANKLINE>
912 #: b.py:1 b.py:3
913 msgid "b"
914 msgstr ""
915 <BLANKLINE>
916 '''
917 def compare_occurrences(a, b):
918 """
919 Compare an entry occurrence with another one.
920 """
921 if a[0] != b[0]:
922 return a[0] < b[0]
923 if a[1] != b[1]:
924 return a[1] < b[1]
925 return 0
926
927
928 if self.obsolete != other.obsolete:
929 if self.obsolete:
930 return -1
931 else:
932 return 1
933
934 occ1 = self.occurrences[:]
935 occ2 = other.occurrences[:]
936
937 occ1.sort(compare_occurrences)
938 occ2.sort(compare_occurrences)
939
940 pos = 0
941 for entry1 in occ1:
942 try:
943 entry2 = occ2[pos]
944 except IndexError:
945 return 1
946 pos = pos + 1
947 if entry1[0] != entry2[0]:
948 if entry1[0] > entry2[0]:
949 return 1
950 else:
951 return -1
952 if entry1[1] != entry2[1]:
953 if entry1[1] > entry2[1]:
954 return 1
955 else:
956 return -1
957
958 if self.msgid > other.msgid: return 1
959 else: return -1
960
961 - def translated(self):
962 """
963 Return True if the entry has been translated or False.
964 """
965 if self.obsolete or 'fuzzy' in self.flags:
966 return False
967 if self.msgstr != '':
968 return True
969 if self.msgstr_plural:
970 for pos in self.msgstr_plural:
971 if self.msgstr_plural[pos] == '':
972 return False
973 return True
974 return False
975
976 - def merge(self, other):
977 """
978 Merge the current entry with the given pot entry.
979 """
980 self.msgid = other.msgid
981 self.occurrences = other.occurrences
982 self.comment = other.comment
983 self.flags = other.flags
984 self.msgid_plural = other.msgid_plural
985 if other.msgstr_plural:
986 for pos in other.msgstr_plural:
987 try:
988
989 self.msgstr_plural[pos]
990 except KeyError:
991 self.msgstr_plural[pos] = ''
992
993
994
995
996 -class MOEntry(_BaseEntry):
997 """
998 Represents a mo file entry.
999
1000 **Examples**:
1001
1002 >>> entry = MOEntry()
1003 >>> entry.msgid = 'translate me !'
1004 >>> entry.msgstr = 'traduisez moi !'
1005 >>> print(entry)
1006 msgid "translate me !"
1007 msgstr "traduisez moi !"
1008 <BLANKLINE>
1009 """
1010
1011 - def __str__(self, wrapwidth=78):
1012 """
1013 Return the string representation of the entry.
1014 """
1015 return _BaseEntry.__str__(self, wrapwidth)
1016
1017
1018
1019
1021 """
1022 A finite state machine to parse efficiently and correctly po
1023 file format.
1024 """
1025
1027 """
1028 Constructor.
1029
1030 **Keyword argument**:
1031 - *fpath*: string, path to the po file
1032 """
1033 self.fhandle = open(fpath, 'r')
1034 self.instance = POFile(fpath=fpath)
1035 self.transitions = {}
1036 self.current_entry = POEntry()
1037 self.current_state = 'ST'
1038 self.current_token = None
1039
1040 self.msgstr_index = 0
1041 self.entry_obsolete = 0
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI']
1056
1057 self.add('TC', ['ST', 'HE'], 'HE')
1058 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC')
1059 self.add('GC', all_, 'GC')
1060 self.add('OC', all_, 'OC')
1061 self.add('FL', all_, 'FL')
1062 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI')
1063 self.add('MP', ['TC', 'GC', 'MI'], 'MP')
1064 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1065 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1066 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
1067
1069 """
1070 Run the state machine, parse the file line by line and call process()
1071 with the current matched symbol.
1072 """
1073 i, lastlen = 1, 0
1074 for line in self.fhandle:
1075 line = line.strip()
1076 if line == '':
1077 i = i+1
1078 continue
1079 if line[:3] == '#~ ':
1080 line = line[3:]
1081 self.entry_obsolete = 1
1082 else:
1083 self.entry_obsolete = 0
1084 self.current_token = line
1085 if line[:2] == '#:':
1086
1087 self.process('OC', i)
1088 elif line[:7] == 'msgid "':
1089
1090 self.process('MI', i)
1091 elif line[:8] == 'msgstr "':
1092
1093 self.process('MS', i)
1094 elif line[:1] == '"':
1095
1096 self.process('MC', i)
1097 elif line[:14] == 'msgid_plural "':
1098
1099 self.process('MP', i)
1100 elif line[:7] == 'msgstr[':
1101
1102 self.process('MX', i)
1103 elif line[:3] == '#, ':
1104
1105 self.process('FL', i)
1106 elif line[:2] == '# ' or line == '#':
1107 if line == '#': line = line + ' '
1108
1109 self.process('TC', i)
1110 elif line[:2] == '#.':
1111
1112 self.process('GC', i)
1113 i = i+1
1114
1115 if self.current_entry:
1116
1117
1118 self.instance.append(self.current_entry)
1119
1120
1121 firstentry = self.instance[0]
1122 if firstentry.msgid == '':
1123
1124 firstentry = self.instance.pop(0)
1125 self.instance.metadata_is_fuzzy = firstentry.flags
1126 key = None
1127 for msg in firstentry.msgstr.splitlines():
1128 try:
1129 key, val = msg.split(':', 1)
1130 self.instance.metadata[key] = val.strip()
1131 except:
1132 if key is not None:
1133 self.instance.metadata[key] += '\n'+ msg.strip()
1134
1135 self.fhandle.close()
1136 return self.instance
1137
1138 - def add(self, symbol, states, next_state):
1139 """
1140 Add a transition to the state machine.
1141 Keywords arguments:
1142
1143 symbol -- string, the matched token (two chars symbol)
1144 states -- list, a list of states (two chars symbols)
1145 next_state -- the next state the fsm will have after the action
1146 """
1147 for state in states:
1148 action = getattr(self, 'handle_%s' % next_state.lower())
1149 self.transitions[(symbol, state)] = (action, next_state)
1150
1151 - def process(self, symbol, linenum):
1152 """
1153 Process the transition corresponding to the current state and the
1154 symbol provided.
1155
1156 Keywords arguments:
1157 symbol -- string, the matched token (two chars symbol)
1158 linenum -- integer, the current line number of the parsed file
1159 """
1160 try:
1161 (action, state) = self.transitions[(symbol, self.current_state)]
1162 if action():
1163 self.current_state = state
1164 except Exception, exc:
1165 raise IOError('Syntax error in po file (line %s)' % linenum)
1166
1167
1168
1170 """Handle a header comment."""
1171 if self.instance.header != '':
1172 self.instance.header += '\n'
1173 self.instance.header += self.current_token[2:]
1174 return 1
1175
1177 """Handle a translator comment."""
1178 if self.current_state in ['MC', 'MS', 'MX']:
1179 self.instance.append(self.current_entry)
1180 self.current_entry = POEntry()
1181 if self.current_entry.tcomment != '':
1182 self.current_entry.tcomment += '\n'
1183 self.current_entry.tcomment += self.current_token[2:]
1184 return True
1185
1187 """Handle a generated comment."""
1188 if self.current_state in ['MC', 'MS', 'MX']:
1189 self.instance.append(self.current_entry)
1190 self.current_entry = POEntry()
1191 if self.current_entry.comment != '':
1192 self.current_entry.comment += '\n'
1193 self.current_entry.comment += self.current_token[3:]
1194 return True
1195
1197 """Handle a file:num occurence."""
1198 if self.current_state in ['MC', 'MS', 'MX']:
1199 self.instance.append(self.current_entry)
1200 self.current_entry = POEntry()
1201 occurrences = self.current_token[3:].split()
1202 for occurrence in occurrences:
1203 if occurrence != '':
1204 try:
1205 fil, line = occurrence.split(':')
1206 if not line.isdigit():
1207 fil = fil + line
1208 line = ''
1209 self.current_entry.occurrences.append((fil, line))
1210 except:
1211 self.current_entry.occurrences.append((occurrence, ''))
1212 return True
1213
1215 """Handle a flags line."""
1216 if self.current_state in ['MC', 'MS', 'MX']:
1217 self.instance.append(self.current_entry)
1218 self.current_entry = POEntry()
1219 self.current_entry.flags += self.current_token[3:].split(', ')
1220 return True
1221
1223 """Handle a msgid."""
1224 if self.current_state in ['MC', 'MS', 'MX']:
1225 self.instance.append(self.current_entry)
1226 self.current_entry = POEntry()
1227 self.current_entry.obsolete = self.entry_obsolete
1228 self.current_entry.msgid = unescape(self.current_token[7:-1])
1229 return True
1230
1232 """Handle a msgid plural."""
1233 self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
1234 return True
1235
1237 """Handle a msgstr."""
1238 self.current_entry.msgstr = unescape(self.current_token[8:-1])
1239 return True
1240
1242 """Handle a msgstr plural."""
1243 index, value = self.current_token[7], self.current_token[11:-1]
1244 self.current_entry.msgstr_plural[index] = unescape(value)
1245 self.msgstr_index = index
1246 return True
1247
1249 """Handle a msgid or msgstr continuation line."""
1250 if self.current_state == 'MI':
1251 self.current_entry.msgid += unescape(self.current_token[1:-1])
1252 elif self.current_state == 'MP':
1253 self.current_entry.msgid_plural += \
1254 unescape(self.current_token[1:-1])
1255 elif self.current_state == 'MS':
1256 self.current_entry.msgstr += unescape(self.current_token[1:-1])
1257 elif self.current_state == 'MX':
1258 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\
1259 unescape(self.current_token[1:-1])
1260 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr
1261
1262 return False
1263
1264
1265
1266
1268 """
1269 A class to parse binary mo files.
1270 """
1271 BIG_ENDIAN = 0xde120495
1272 LITTLE_ENDIAN = 0x950412de
1273
1275 """_MOFileParser constructor."""
1276 self.fhandle = open(fpath, 'rb')
1277 self.instance = MOFile(fpath)
1278
1280 """
1281 Parse the magic number and raise an exception if not valid.
1282 """
1283
1285 """
1286 Build the instance with the file handle provided in the
1287 constructor.
1288 """
1289 magic_number = self._readbinary('<I', 4)
1290 if magic_number == self.LITTLE_ENDIAN:
1291 ii = '<II'
1292 elif magic_number == self.BIG_ENDIAN:
1293 ii = '>II'
1294 else:
1295 raise IOError('Invalid mo file, magic number is incorrect !')
1296 self.instance.magic_number = magic_number
1297
1298 self.instance.version, numofstrings = self._readbinary(ii, 8)
1299
1300 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1301
1302 self.fhandle.seek(msgids_hash_offset)
1303 msgids_index = []
1304 for i in range(numofstrings):
1305 msgids_index.append(self._readbinary(ii, 8))
1306
1307 self.fhandle.seek(msgstrs_hash_offset)
1308 msgstrs_index = []
1309 for i in range(numofstrings):
1310 msgstrs_index.append(self._readbinary(ii, 8))
1311
1312 for i in range(numofstrings):
1313 self.fhandle.seek(msgids_index[i][1])
1314 msgid = self.fhandle.read(msgids_index[i][0])
1315 self.fhandle.seek(msgstrs_index[i][1])
1316 msgstr = self.fhandle.read(msgstrs_index[i][0])
1317 if i == 0:
1318 raw_metadata, metadata = msgstr.split('\n'), {}
1319 for line in raw_metadata:
1320 tokens = line.split(':', 1)
1321 if tokens[0] != '':
1322 try:
1323 metadata[tokens[0]] = tokens[1].strip()
1324 except IndexError:
1325 metadata[tokens[0]] = ''
1326 self.instance.metadata = metadata
1327 continue
1328 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1329 self.instance.append(entry)
1330
1331 self.fhandle.close()
1332 return self.instance
1333
1335 """
1336 Private method that unpack n bytes of data using format <fmt>.
1337 It returns a tuple or a mixed value if the tuple length is 1.
1338 """
1339 bytes = self.fhandle.read(numbytes)
1340 tup = struct.unpack(fmt, bytes)
1341 if len(tup) == 1:
1342 return tup[0]
1343 return tup
1344
1345
1346
1347
1348 if __name__ == '__main__':
1349 """
1350 **Main function**::
1351 - to **test** the module just run: *python polib.py [-v]*
1352 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1353 """
1354 import sys
1355 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1357 if f.endswith('po'):
1358 p = pofile(f)
1359 else:
1360 p = mofile(f)
1361 s = str(p)
1362 import profile
1363 profile.run('test("'+sys.argv[2]+'")')
1364 else:
1365 import doctest
1366 doctest.testmod()
1367
1368
1369