1
2
3
4
5
6
7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po
9 and mo files). You can load existing files, iterate through it's entries,
10 add, modify entries, comments or metadata, etc... or create new po files
11 from scratch.
12
13 **polib** provides a simple and pythonic API, exporting only three
14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
16 new files/entries.
17
18 **Basic example**:
19
20 >>> import polib
21 >>> # load an existing po file
22 >>> po = polib.pofile('tests/test_utf8.po')
23 >>> for entry in po:
24 ... # do something with entry...
25 ... pass
26 >>> # add an entry
27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
29 >>> po.append(entry)
30 >>> # to save our modified po file:
31 >>> # po.save()
32 >>> # or you may want to compile the po file
33 >>> # po.save_as_mofile('tests/test_utf8.mo')
34 """
35
36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
37 __version__ = '0.3.1'
38 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
39 'detect_encoding', 'escape', 'unescape']
40
41 import struct
42 import textwrap
43 import warnings
44
45 default_encoding = 'utf-8'
46
47
48
49
50 _dictget = dict.get
51 _listappend = list.append
52 _listpop = list.pop
53 _strjoin = str.join
54 _strsplit = str.split
55 _strstrip = str.strip
56 _strreplace = str.replace
57 _textwrap = textwrap.wrap
58
59
60
61
63 """
64 Convenience function that parse the po/pot file *fpath* and return
65 a POFile instance.
66
67 **Keyword arguments**:
68 - *fpath*: string, full or relative path to the po/pot file to parse
69 - *wrapwidth*: integer, the wrap width, only useful when -w option was
70 passed to xgettext (optional, default to 78)
71 - *autodetect_encoding*: boolean, if set to False the function will
72 not try to detect the po file encoding (optional, default to True)
73 - *encoding*: string, an encoding, only relevant if autodetect_encoding
74 is set to False
75
76 **Example**:
77
78 >>> import polib
79 >>> po = polib.pofile('tests/test_utf8.po')
80 >>> po #doctest: +ELLIPSIS
81 <POFile instance at ...>
82 >>> import os, tempfile
83 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
84 ... orig_po = polib.pofile('tests/'+fname)
85 ... tmpf = tempfile.NamedTemporaryFile().name
86 ... orig_po.save(tmpf)
87 ... try:
88 ... new_po = polib.pofile(tmpf)
89 ... for old, new in zip(orig_po, new_po):
90 ... if old.msgid != new.msgid:
91 ... old.msgid
92 ... new.msgid
93 ... if old.msgstr != new.msgstr:
94 ... old.msgid
95 ... new.msgid
96 ... finally:
97 ... os.unlink(tmpf)
98 """
99 if _dictget(kwargs, 'autodetect_encoding', True) == True:
100 enc = detect_encoding(fpath)
101 else:
102 enc = _dictget(kwargs, 'encoding', default_encoding)
103 parser = _POFileParser(fpath)
104 instance = parser.parse()
105 instance.wrapwidth = _dictget(kwargs, 'wrapwidth', 78)
106 instance.encoding = enc
107 return instance
108
109
110
111
113 """
114 Convenience function that parse the mo file *fpath* and return
115 a MOFile instance.
116
117 **Keyword arguments**:
118 - *fpath*: string, full or relative path to the mo file to parse
119 - *wrapwidth*: integer, the wrap width, only useful when -w option was
120 passed to xgettext to generate the po file that was used to format
121 the mo file (optional, default to 78)
122 - *autodetect_encoding*: boolean, if set to False the function will
123 not try to detect the po file encoding (optional, default to True)
124 - *encoding*: string, an encoding, only relevant if autodetect_encoding
125 is set to False
126
127 **Example**:
128
129 >>> import polib
130 >>> mo = polib.mofile('tests/test_utf8.mo')
131 >>> mo #doctest: +ELLIPSIS
132 <MOFile instance at ...>
133 >>> import os, tempfile
134 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
135 ... orig_mo = polib.mofile('tests/'+fname)
136 ... tmpf = tempfile.NamedTemporaryFile().name
137 ... orig_mo.save(tmpf)
138 ... try:
139 ... new_mo = polib.mofile(tmpf)
140 ... for old, new in zip(orig_mo, new_mo):
141 ... if old.msgid != new.msgid:
142 ... old.msgstr
143 ... new.msgstr
144 ... finally:
145 ... os.unlink(tmpf)
146 """
147 if _dictget(kwargs, 'autodetect_encoding', True) == True:
148 enc = detect_encoding(fpath)
149 else:
150 enc = _dictget(kwargs, 'encoding', default_encoding)
151 parser = _MOFileParser(fpath)
152 instance = parser.parse()
153 instance.wrapwidth = _dictget(kwargs, 'wrapwidth', 78)
154 instance.encoding = enc
155 return instance
156
157
158
159
161 """
162 Try to detect the encoding used by the file *fpath*. The function will
163 return polib default *encoding* if it's unable to detect it.
164
165 **Keyword argument**:
166 - *fpath*: string, full or relative path to the mo file to parse.
167
168 **Examples**:
169
170 >>> print(detect_encoding('tests/test_noencoding.po'))
171 utf-8
172 >>> print(detect_encoding('tests/test_utf8.po'))
173 UTF-8
174 >>> print(detect_encoding('tests/test_utf8.mo'))
175 UTF-8
176 >>> print(detect_encoding('tests/test_iso-8859-15.po'))
177 ISO_8859-15
178 >>> print(detect_encoding('tests/test_iso-8859-15.mo'))
179 ISO_8859-15
180 """
181 import re
182 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
183 f = open(fpath)
184 for l in f:
185 match = rx.search(l)
186 if match:
187 f.close()
188 return _strstrip(match.group(1))
189 f.close()
190 return default_encoding
191
192
193
194
196 """
197 Escape special chars and return the given string *st*.
198
199 **Examples**:
200
201 >>> escape('\\t and \\n and \\r and " and \\\\')
202 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
203 """
204 st = _strreplace(st, '\\', r'\\')
205 st = _strreplace(st, '\t', r'\t')
206 st = _strreplace(st, '\r', r'\r')
207 st = _strreplace(st, '\n', r'\n')
208 st = _strreplace(st, '\"', r'\"')
209 return st
210
211
212
213
215 """
216 Unescape special chars and return the given string *st*.
217
218 **Examples**:
219
220 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
221 '\\t and \\n and \\r and " and \\\\'
222 """
223 st = _strreplace(st, r'\"', '"')
224 st = _strreplace(st, r'\n', '\n')
225 st = _strreplace(st, r'\r', '\r')
226 st = _strreplace(st, r'\t', '\t')
227 st = _strreplace(st, r'\\', '\\')
228 return st
229
230
231
232
234 """
235 Common parent class for POFile and MOFile classes.
236 This class must **not** be instanciated directly.
237 """
238
240 """
241 Constructor.
242
243 **Keyword arguments**:
244 - *fpath*: string, path to po or mo file
245 - *wrapwidth*: integer, the wrap width, only useful when -w option
246 was passed to xgettext to generate the po file that was used to
247 format the mo file, default to 78 (optional).
248 """
249 list.__init__(self)
250
251 self.fpath = fpath
252
253 self.wrapwidth = wrapwidth
254
255 self.encoding = encoding
256
257 self.header = ''
258
259 self.metadata = {}
260 self.metadata_is_fuzzy = 0
261
263 """String representation of the file."""
264 ret = []
265 entries = [self.metadata_as_entry()] + \
266 [e for e in self if not e.obsolete]
267 for entry in entries:
268 _listappend(ret, entry.__str__(self.wrapwidth))
269 for entry in self.obsolete_entries():
270 _listappend(ret, entry.__str__(self.wrapwidth))
271 return _strjoin('\n', ret)
272
274 """Return the official string representation of the object."""
275 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
276
278 """Return the metadata as an entry"""
279 e = POEntry(msgid='')
280 mdata = self.ordered_metadata()
281 if mdata:
282 strs = []
283 for name, value in mdata:
284
285 value = _strjoin('\n', [_strstrip(v)
286 for v in _strsplit(value, '\n')])
287 _listappend(strs, '%s: %s' % (name, value))
288 e.msgstr = _strjoin('\n', strs) + '\n'
289 return e
290
291 - def save(self, fpath=None, repr_method='__str__'):
292 """
293 Save the po file to file *fpath* if no file handle exists for
294 the object. If there's already an open file and no fpath is
295 provided, then the existing file is rewritten with the modified
296 data.
297
298 **Keyword arguments**:
299 - *fpath*: string, full or relative path to the file.
300 - *repr_method*: string, the method to use for output.
301 """
302 if self.fpath is None and fpath is None:
303 raise IOError('You must provide a file path to save() method')
304 contents = getattr(self, repr_method)()
305 if fpath is None:
306 fpath = self.fpath
307 mode = 'w'
308 if repr_method == 'to_binary':
309 mode += 'b'
310 fhandle = open(fpath, mode)
311 fhandle.write(contents)
312 fhandle.close()
313
314 - def find(self, st, by='msgid'):
315 """
316 Find entry which msgid (or property identified by the *by*
317 attribute) matches the string *st*.
318
319 **Keyword arguments**:
320 - *st*: string, the string to search for
321 - *by*: string, the comparison attribute
322
323 **Examples**:
324
325 >>> po = pofile('tests/test_utf8.po')
326 >>> entry = po.find('Thursday')
327 >>> entry.msgstr
328 'Jueves'
329 >>> entry = po.find('Some unexistant msgid')
330 >>> entry is None
331 True
332 >>> entry = po.find('Jueves', 'msgstr')
333 >>> entry.msgid
334 'Thursday'
335 """
336 try:
337 return [e for e in self if getattr(e, by) == st][0]
338 except IndexError:
339 return None
340
373
375 """Return the mofile binary representation."""
376 import struct
377 import array
378 output = ''
379 offsets = []
380 ids = strs = ''
381 entries = self.translated_entries()
382
383 def cmp(_self, other):
384 if _self.msgid > other.msgid:
385 return 1
386 elif _self.msgid < other.msgid:
387 return -1
388 else:
389 return 0
390 entries.sort(cmp)
391
392 mentry = self.metadata_as_entry()
393 mentry.msgstr = _strreplace(mentry.msgstr, '\\n', '').lstrip() + '\n'
394 entries = [mentry] + entries
395 entries_len = len(entries)
396 for e in entries:
397
398
399 msgid = e._decode(e.msgid)
400 msgstr = e._decode(e.msgstr)
401 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
402 ids += msgid + '\0'
403 strs += msgstr + '\0'
404
405 keystart = 7*4+16*entries_len
406
407 valuestart = keystart + len(ids)
408 koffsets = []
409 voffsets = []
410
411
412 for o1, l1, o2, l2 in offsets:
413 koffsets += [l1, o1+keystart]
414 voffsets += [l2, o2+valuestart]
415 offsets = koffsets + voffsets
416 output = struct.pack("IIIIIII",
417 0x950412de,
418 0,
419 entries_len,
420 7*4,
421 7*4+entries_len*8,
422 0, 0)
423 output += array.array("I", offsets).tostring()
424 output += ids
425 output += strs
426 return output
427
428
429
430
432 '''
433 Po (or Pot) file reader/writer.
434 POFile objects inherit the list objects methods.
435
436 **Example**:
437
438 >>> po = POFile()
439 >>> entry1 = POEntry(
440 ... msgid="Some english text",
441 ... msgstr="Un texte en anglais"
442 ... )
443 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
444 >>> entry1.comment = "Some useful comment"
445 >>> entry2 = POEntry(
446 ... msgid="Peace in some languages",
447 ... msgstr="Pace سلام שלום Hasîtî 和平"
448 ... )
449 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
450 >>> entry2.comment = "Another useful comment"
451 >>> entry3 = POEntry(
452 ... msgid='Some entry with quotes " \\"',
453 ... msgstr='Un message unicode avec des quotes " \\"'
454 ... )
455 >>> entry3.comment = "Test string quoting"
456 >>> po.append(entry1)
457 >>> po.append(entry2)
458 >>> po.append(entry3)
459 >>> po.header = "Some Header"
460 >>> print(po)
461 # Some Header
462 msgid ""
463 msgstr ""
464 <BLANKLINE>
465 #. Some useful comment
466 #: testfile:12 another_file:1
467 msgid "Some english text"
468 msgstr "Un texte en anglais"
469 <BLANKLINE>
470 #. Another useful comment
471 #: testfile:15 another_file:5
472 msgid "Peace in some languages"
473 msgstr "Pace سلام שלום Hasîtî 和平"
474 <BLANKLINE>
475 #. Test string quoting
476 msgid "Some entry with quotes \\" \\""
477 msgstr "Un message unicode avec des quotes \\" \\""
478 <BLANKLINE>
479 '''
480
482 """Return the string representation of the po file"""
483 ret, headers = '', _strsplit(self.header, '\n')
484 for header in headers:
485 if header[:1] in [',', ':']:
486 ret += '#%s\n' % header
487 else:
488 ret += '# %s\n' % header
489 return ret + _BaseFile.__str__(self)
490
492 """
493 Save the binary representation of the file to *fpath*.
494
495 **Keyword arguments**:
496 - *fpath*: string, full or relative path to the file.
497 """
498 _BaseFile.save(self, fpath, 'to_binary')
499
501 """
502 Convenience method that return the percentage of translated
503 messages.
504
505 **Example**:
506
507 >>> import polib
508 >>> po = polib.pofile('tests/test_pofile_helpers.po')
509 >>> po.percent_translated()
510 50
511 >>> po = POFile()
512 >>> po.percent_translated()
513 100
514 """
515 total = len([e for e in self if not e.obsolete])
516 if total == 0:
517 return 100
518 translated = len(self.translated_entries())
519 return int((100.00 / float(total)) * translated)
520
522 """
523 Convenience method that return a list of translated entries.
524
525 **Example**:
526
527 >>> import polib
528 >>> po = polib.pofile('tests/test_pofile_helpers.po')
529 >>> len(po.translated_entries())
530 6
531 """
532 return [e for e in self if e.translated() and not e.obsolete]
533
535 """
536 Convenience method that return a list of untranslated entries.
537
538 **Example**:
539
540 >>> import polib
541 >>> po = polib.pofile('tests/test_pofile_helpers.po')
542 >>> len(po.untranslated_entries())
543 6
544 """
545 return [e for e in self if not e.translated() and not e.obsolete]
546
548 """
549 Convenience method that return the list of 'fuzzy' entries.
550
551 **Example**:
552
553 >>> import polib
554 >>> po = polib.pofile('tests/test_pofile_helpers.po')
555 >>> len(po.fuzzy_entries())
556 2
557 """
558 return [e for e in self if 'fuzzy' in e.flags]
559
561 """
562 Convenience method that return the list of obsolete entries.
563
564 **Example**:
565
566 >>> import polib
567 >>> po = polib.pofile('tests/test_pofile_helpers.po')
568 >>> len(po.obsolete_entries())
569 4
570 """
571 return [e for e in self if e.obsolete]
572
573 - def merge(self, refpot):
574 """
575 XXX this could not work if encodings are different, needs thinking
576 and general refactoring of how polib handles encoding...
577
578 Convenience method that merge the current pofile with the pot file
579 provided. It behaves exactly as the gettext msgmerge utility:
580
581 - comments of this file will be preserved, but extracted comments
582 and occurrences will be discarded
583 - any translations or comments in the file will be discarded,
584 however dot comments and file positions will be preserved
585
586 **Keyword argument**:
587 - *refpot*: object POFile, the reference catalog.
588
589 **Example**:
590
591 >>> import polib
592 >>> refpot = polib.pofile('tests/test_merge.pot')
593 >>> po = polib.pofile('tests/test_merge_before.po')
594 >>> po.merge(refpot)
595 >>> expected_po = polib.pofile('tests/test_merge_after.po')
596 >>> str(po) == str(expected_po)
597 True
598 """
599 for entry in refpot:
600 e = self.find(entry.msgid)
601 if e is None:
602
603
604 self.append(POEntry(
605 msgid=entry.msgid,
606 occurrences=entry.occurrences,
607 comment=entry.comment
608 ))
609 else:
610
611 e.occurrences = entry.occurrences
612 e.comment = entry.comment
613
614
615 for entry in self:
616 if refpot.find(entry.msgid) is None:
617 entry.obsolete = True
618
619
620
621
623 '''
624 Mo file reader/writer.
625 MOFile objects inherit the list objects methods.
626
627 **Example**:
628
629 >>> mo = MOFile()
630 >>> entry1 = POEntry(
631 ... msgid="Some english text",
632 ... msgstr="Un texte en anglais"
633 ... )
634 >>> entry2 = POEntry(
635 ... msgid="I need my dirty cheese",
636 ... msgstr="Je veux mon sale fromage"
637 ... )
638 >>> entry3 = MOEntry(
639 ... msgid='Some entry with quotes " \\"',
640 ... msgstr='Un message unicode avec des quotes " \\"'
641 ... )
642 >>> mo.append(entry1)
643 >>> mo.append(entry2)
644 >>> mo.append(entry3)
645 >>> print(mo)
646 msgid ""
647 msgstr ""
648 <BLANKLINE>
649 msgid "Some english text"
650 msgstr "Un texte en anglais"
651 <BLANKLINE>
652 msgid "I need my dirty cheese"
653 msgstr "Je veux mon sale fromage"
654 <BLANKLINE>
655 msgid "Some entry with quotes \\" \\""
656 msgstr "Un message unicode avec des quotes \\" \\""
657 <BLANKLINE>
658 '''
659
660 - def __init__(self, fpath=None, wrapwidth=78):
661 """
662 MOFile constructor.
663 See _BaseFile.__construct.
664 """
665 _BaseFile.__init__(self, fpath, wrapwidth)
666 self.magic_number = None
667 self.version = 0
668
670 """
671 Save the string representation of the file to *fpath*.
672
673 **Keyword argument**:
674 - *fpath*: string, full or relative path to the file.
675 """
676 _BaseFile.save(self, fpath)
677
678 - def save(self, fpath):
679 """
680 Save the binary representation of the file to *fpath*.
681
682 **Keyword argument**:
683 - *fpath*: string, full or relative path to the file.
684 """
685 _BaseFile.save(self, fpath, 'to_binary')
686
688 """
689 Convenience method to keep the same interface with POFile instances.
690 """
691 return 100
692
694 """
695 Convenience method to keep the same interface with POFile instances.
696 """
697 return self
698
700 """
701 Convenience method to keep the same interface with POFile instances.
702 """
703 return []
704
706 """
707 Convenience method to keep the same interface with POFile instances.
708 """
709 return []
710
712 """
713 Convenience method to keep the same interface with POFile instances.
714 """
715 return []
716
717
718
719
720 -class _BaseEntry(object):
721 """
722 Base class for POEntry or MOEntry objects.
723 This class must *not* be instanciated directly.
724 """
725
726 - def __init__(self, *args, **kwargs):
727 """Base Entry constructor."""
728 self.msgid = _dictget(kwargs, 'msgid', '')
729 self.msgstr = _dictget(kwargs, 'msgstr', '')
730 self.msgid_plural = _dictget(kwargs, 'msgid_plural', '')
731 self.msgstr_plural = _dictget(kwargs, 'msgstr_plural', {})
732 self.obsolete = _dictget(kwargs, 'obsolete', False)
733 self.encoding = _dictget(kwargs, 'encoding', default_encoding)
734
735 - def __repr__(self):
736 """Return the official string representation of the object."""
737 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
738
739 - def __str__(self, wrapwidth=78):
740 """
741 Common string representation of the POEntry and MOEntry
742 objects.
743 """
744 if self.obsolete:
745 delflag = '#~ '
746 else:
747 delflag = ''
748
749 ret = []
750 ret += self._str_field("msgid", delflag, "", self.msgid)
751
752 if self.msgid_plural:
753 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
754 if self.msgstr_plural:
755
756 msgstrs = self.msgstr_plural
757 keys = msgstrs.keys()
758 list(keys).sort()
759 for index in keys:
760 msgstr = msgstrs[index]
761 plural_index = '[%s]' % index
762 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
763 else:
764
765 ret += self._str_field("msgstr", delflag, "", self.msgstr)
766 _listappend(ret, '')
767 return _strjoin('\n', ret)
768
769 - def _str_field(self, fieldname, delflag, plural_index, field):
770 field = self._decode(field)
771 lines = field.splitlines(True)
772
773
774 if len(lines) > 1:
775 lines = ['']+lines
776 else:
777 lines = [field]
778 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
779 escape(_listpop(lines, 0)))]
780 for mstr in lines:
781 _listappend(ret, '%s"%s"' % (delflag, escape(mstr)))
782 return ret
783
784 - def _decode(self, st):
785 try:
786 if isinstance(st, unicode):
787 st = st.encode(self.encoding)
788 return st
789 except:
790 return st
791
792
793
794
795 -class POEntry(_BaseEntry):
796 """
797 Represents a po file entry.
798
799 **Examples**:
800
801 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
802 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
803 >>> print(entry)
804 #: welcome.py:12 anotherfile.py:34
805 msgid "Welcome"
806 msgstr "Bienvenue"
807 <BLANKLINE>
808 >>> entry = POEntry()
809 >>> entry.occurrences = [('src/spam.c', 32), ('src/eggs.c', 45)]
810 >>> entry.tcomment = 'A plural translation'
811 >>> entry.flags.append('c-format')
812 >>> entry.msgid = 'I have spam but no egg !'
813 >>> entry.msgid_plural = 'I have spam and %d eggs !'
814 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
815 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
816 >>> print(entry)
817 # A plural translation
818 #: src/spam.c:32 src/eggs.c:45
819 #, c-format
820 msgid "I have spam but no egg !"
821 msgid_plural "I have spam and %d eggs !"
822 msgstr[0] "J'ai du jambon mais aucun oeuf !"
823 msgstr[1] "J'ai du jambon et %d oeufs !"
824 <BLANKLINE>
825 """
826
827 - def __init__(self, *args, **kwargs):
828 """POEntry constructor."""
829 _BaseEntry.__init__(self, *args, **kwargs)
830 self.comment = _dictget(kwargs, 'comment', '')
831 self.tcomment = _dictget(kwargs, 'tcomment', '')
832 self.occurrences = _dictget(kwargs, 'occurrences', [])
833 self.flags = _dictget(kwargs, 'flags', [])
834
835 - def __str__(self, wrapwidth=78):
836 """
837 Return the string representation of the entry.
838 """
839 if self.obsolete:
840 return _BaseEntry.__str__(self)
841 ret = []
842
843 if self.comment != '':
844 comments = _strsplit(self._decode(self.comment), '\n')
845 for comment in comments:
846 if wrapwidth > 0 and len(comment) > wrapwidth-3:
847 lines = _textwrap(comment, wrapwidth,
848 initial_indent='#. ',
849 subsequent_indent='#. ',
850 break_long_words=False)
851 _listappend(ret, lines)
852 else:
853 _listappend(ret, '#. %s' % comment)
854
855 if self.tcomment != '':
856 tcomments = _strsplit(self._decode(self.tcomment), '\n')
857 for tcomment in tcomments:
858 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
859 lines = _textwrap(tcomment, wrapwidth,
860 initial_indent='# ',
861 subsequent_indent='# ',
862 break_long_words=False)
863 _listappend(ret, lines)
864 else:
865 _listappend(ret, '# %s' % tcomment)
866
867 if self.occurrences:
868 filelist = []
869 for fpath, lineno in self.occurrences:
870 _listappend(filelist, '%s:%s' % (self._decode(fpath), lineno))
871 filestr = _strjoin(' ', filelist)
872 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
873
874
875
876
877 lines = _textwrap(_strreplace(filestr, '-', '*'),
878 wrapwidth,
879 initial_indent='#: ',
880 subsequent_indent='#: ',
881 break_long_words=False)
882
883 for line in lines:
884 _listappend(ret, _strreplace(line, '*', '-'))
885 else:
886 _listappend(ret, '#: '+filestr)
887
888 if self.flags:
889 flags = []
890 for flag in self.flags:
891 _listappend(flags, flag)
892 _listappend(ret, '#, %s' % _strjoin(', ', flags))
893 _listappend(ret, _BaseEntry.__str__(self))
894 return _strjoin('\n', ret)
895
896 - def __cmp__(self, other):
897 '''
898 Called by comparison operations if rich comparison is not defined.
899
900 **Tests**:
901 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
902 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
903 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
904 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
905 >>> po = POFile()
906 >>> po.append(a)
907 >>> po.append(b)
908 >>> po.append(c1)
909 >>> po.append(c2)
910 >>> po.sort()
911 >>> print(po)
912 #
913 msgid ""
914 msgstr ""
915 <BLANKLINE>
916 #: a.py:1 a.py:3
917 msgid "c2"
918 msgstr ""
919 <BLANKLINE>
920 #: a.py:1 b.py:1
921 msgid "c1"
922 msgstr ""
923 <BLANKLINE>
924 #: b.py:1 b.py:3
925 msgid "a"
926 msgstr ""
927 <BLANKLINE>
928 #: b.py:1 b.py:3
929 msgid "b"
930 msgstr ""
931 <BLANKLINE>
932 '''
933 def compare_occurrences(a, b):
934 """
935 Compare an entry occurrence with another one.
936 """
937 if a[0] != b[0]:
938 return a[0] < b[0]
939 if a[1] != b[1]:
940 return a[1] < b[1]
941 return 0
942
943
944 if self.obsolete != other.obsolete:
945 if self.obsolete:
946 return -1
947 else:
948 return 1
949
950 occ1 = self.occurrences[:]
951 occ2 = other.occurrences[:]
952
953 occ1.sort(compare_occurrences)
954 occ2.sort(compare_occurrences)
955
956 pos = 0
957 for entry1 in occ1:
958 try:
959 entry2 = occ2[pos]
960 except IndexError:
961 return 1
962 pos = pos + 1
963 if entry1[0] != entry2[0]:
964 if entry1[0] > entry2[0]:
965 return 1
966 else:
967 return -1
968 if entry1[1] != entry2[1]:
969 if entry1[1] > entry2[1]:
970 return 1
971 else:
972 return -1
973
974 if self.msgid > other.msgid: return 1
975 else: return -1
976
977 - def translated(self):
978 """Return True if the entry has been translated or False"""
979 if self.obsolete or 'fuzzy' in self.flags:
980 return False
981 if self.msgstr != '':
982 return True
983 if self.msgstr_plural:
984 for pos in self.msgstr_plural:
985 if self.msgstr_plural[pos] == '':
986 return False
987 return True
988 return False
989
990
991
992
993 -class MOEntry(_BaseEntry):
994 """
995 Represents a mo file entry.
996
997 **Examples**:
998
999 >>> entry = MOEntry()
1000 >>> entry.msgid = 'translate me !'
1001 >>> entry.msgstr = 'traduisez moi !'
1002 >>> print(entry)
1003 msgid "translate me !"
1004 msgstr "traduisez moi !"
1005 <BLANKLINE>
1006 """
1007
1008 - def __str__(self, wrapwidth=78):
1009 """
1010 Return the string representation of the entry.
1011 """
1012 return _BaseEntry.__str__(self, wrapwidth)
1013
1014
1015
1016
1018 """
1019 A finite state machine to parse efficiently and correctly po
1020 file format.
1021 """
1022
1024 """
1025 Constructor.
1026
1027 **Keyword argument**:
1028 - *fpath*: string, path to the po file
1029 """
1030 self.fhandle = open(fpath, 'r')
1031 self.instance = POFile(fpath=fpath)
1032 self.transitions = {}
1033 self.current_entry = POEntry()
1034 self.current_state = 'ST'
1035 self.current_token = None
1036
1037 self.msgstr_index = 0
1038 self.entry_obsolete = 0
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI']
1053
1054 self.add('TC', ['ST', 'HE'], 'HE')
1055 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC')
1056 self.add('GC', all_, 'GC')
1057 self.add('OC', all_, 'OC')
1058 self.add('FL', all_, 'FL')
1059 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI')
1060 self.add('MP', ['TC', 'GC', 'MI'], 'MP')
1061 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1062 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1063 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
1064
1066 """
1067 Run the state machine, parse the file line by line and call process()
1068 with the current matched symbol.
1069 """
1070 i, lastlen = 1, 0
1071 for line in self.fhandle:
1072 line = _strstrip(line)
1073 if line == '':
1074 i = i+1
1075 continue
1076 if line[:3] == '#~ ':
1077 line = line[3:]
1078 self.entry_obsolete = 1
1079 else:
1080 self.entry_obsolete = 0
1081 self.current_token = line
1082 if line[:2] == '#:':
1083
1084 self.process('OC', i)
1085 elif line[:7] == 'msgid "':
1086
1087 self.process('MI', i)
1088 elif line[:8] == 'msgstr "':
1089
1090 self.process('MS', i)
1091 elif line[:1] == '"':
1092
1093 self.process('MC', i)
1094 elif line[:14] == 'msgid_plural "':
1095
1096 self.process('MP', i)
1097 elif line[:7] == 'msgstr[':
1098
1099 self.process('MX', i)
1100 elif line[:3] == '#, ':
1101
1102 self.process('FL', i)
1103 elif line[:2] == '# ' or line == '#':
1104 if line == '#': line = line + ' '
1105
1106 self.process('TC', i)
1107 elif line[:2] == '#.':
1108
1109 self.process('GC', i)
1110 i = i+1
1111
1112 if self.current_entry:
1113
1114
1115 _listappend(self.instance, self.current_entry)
1116
1117
1118 firstentry = self.instance[0]
1119 if firstentry.msgid == '':
1120
1121 firstentry = _listpop(self.instance, 0)
1122 self.instance.metadata_is_fuzzy = firstentry.flags
1123 key = None
1124 for msg in firstentry.msgstr.splitlines():
1125 try:
1126 key, val = _strsplit(msg, ':', 1)
1127 self.instance.metadata[key] = _strstrip(val)
1128 except:
1129 if key is not None:
1130 self.instance.metadata[key] += '\n'+_strstrip(msg)
1131
1132 self.fhandle.close()
1133 return self.instance
1134
1135 - def add(self, symbol, states, next_state):
1136 """
1137 Add a transition to the state machine.
1138 Keywords arguments:
1139
1140 symbol -- string, the matched token (two chars symbol)
1141 states -- list, a list of states (two chars symbols)
1142 next_state -- the next state the fsm will have after the action
1143 """
1144 for state in states:
1145 action = getattr(self, 'handle_%s' % next_state.lower())
1146 self.transitions[(symbol, state)] = (action, next_state)
1147
1148 - def process(self, symbol, linenum):
1149 """
1150 Process the transition corresponding to the current state and the
1151 symbol provided.
1152
1153 Keywords arguments:
1154 symbol -- string, the matched token (two chars symbol)
1155 linenum -- integer, the current line number of the parsed file
1156 """
1157 try:
1158 (action, state) = self.transitions[(symbol, self.current_state)]
1159 if action():
1160 self.current_state = state
1161 except Exception:
1162 raise IOError('Syntax error in po file (line %s)' % linenum)
1163
1164
1165
1167 """Handle a header comment."""
1168 if self.instance.header != '':
1169 self.instance.header += '\n'
1170 self.instance.header += self.current_token[2:]
1171 return 1
1172
1174 """Handle a translator comment."""
1175 if self.current_state in ['MC', 'MS', 'MX']:
1176 _listappend(self.instance, self.current_entry)
1177 self.current_entry = POEntry()
1178 if self.current_entry.tcomment != '':
1179 self.current_entry.tcomment += '\n'
1180 self.current_entry.tcomment += self.current_token[2:]
1181 return True
1182
1184 """Handle a generated comment."""
1185 if self.current_state in ['MC', 'MS', 'MX']:
1186 _listappend(self.instance, self.current_entry)
1187 self.current_entry = POEntry()
1188 if self.current_entry.comment != '':
1189 self.current_entry.comment += '\n'
1190 self.current_entry.comment += self.current_token[3:]
1191 return True
1192
1194 """Handle a file:num occurence."""
1195 if self.current_state in ['MC', 'MS', 'MX']:
1196 _listappend(self.instance, self.current_entry)
1197 self.current_entry = POEntry()
1198 occurrences = _strsplit(self.current_token[3:])
1199 for occurrence in occurrences:
1200 if occurrence != '':
1201 fil, line = _strsplit(occurrence, ':')
1202 _listappend(self.current_entry.occurrences, (fil, line))
1203 return True
1204
1206 """Handle a flags line."""
1207 if self.current_state in ['MC', 'MS', 'MX']:
1208 _listappend(self.instance, self.current_entry)
1209 self.current_entry = POEntry()
1210 self.current_entry.flags += _strsplit(self.current_token[3:], ', ')
1211 return True
1212
1214 """Handle a msgid."""
1215 if self.current_state in ['MC', 'MS', 'MX']:
1216 _listappend(self.instance, self.current_entry)
1217 self.current_entry = POEntry()
1218 self.current_entry.obsolete = self.entry_obsolete
1219 self.current_entry.msgid = unescape(self.current_token[7:-1])
1220 return True
1221
1223 """Handle a msgid plural."""
1224 self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
1225 return True
1226
1228 """Handle a msgstr."""
1229 self.current_entry.msgstr = unescape(self.current_token[8:-1])
1230 return True
1231
1233 """Handle a msgstr plural."""
1234 index, value = self.current_token[7], self.current_token[11:-1]
1235 self.current_entry.msgstr_plural[index] = unescape(value)
1236 self.msgstr_index = index
1237 return True
1238
1240 """Handle a msgid or msgstr continuation line."""
1241 if self.current_state == 'MI':
1242 self.current_entry.msgid += unescape(self.current_token[1:-1])
1243 elif self.current_state == 'MP':
1244 self.current_entry.msgid_plural += \
1245 unescape(self.current_token[1:-1])
1246 elif self.current_state == 'MS':
1247 self.current_entry.msgstr += unescape(self.current_token[1:-1])
1248 elif self.current_state == 'MX':
1249 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\
1250 unescape(self.current_token[1:-1])
1251 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr
1252
1253 return False
1254
1255
1256
1257
1259 """
1260 A class to parse binary mo files.
1261 """
1262 BIG_ENDIAN = 0xde120495
1263 LITTLE_ENDIAN = 0x950412de
1264
1266 """_MOFileParser constructor."""
1267 self.fhandle = open(fpath, 'rb')
1268 self.instance = MOFile(fpath)
1269
1271 """
1272 Parse the magic number and raise an exception if not valid.
1273 """
1274
1276 """
1277 Build the instance with the file handle provided in the
1278 constructor.
1279 """
1280 magic_number = self._readbinary('<I', 4)
1281 if magic_number == self.LITTLE_ENDIAN:
1282 ii = '<II'
1283 elif magic_number == self.BIG_ENDIAN:
1284 ii = '>II'
1285 else:
1286 raise IOError('Invalid mo file, magic number is incorrect !')
1287 self.instance.magic_number = magic_number
1288
1289 self.instance.version, numofstrings = self._readbinary(ii, 8)
1290
1291 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1292
1293 self.fhandle.seek(msgids_hash_offset)
1294 msgids_index = []
1295 for i in range(numofstrings):
1296 _listappend(msgids_index, self._readbinary(ii, 8))
1297
1298 self.fhandle.seek(msgstrs_hash_offset)
1299 msgstrs_index = []
1300 for i in range(numofstrings):
1301 _listappend(msgstrs_index, self._readbinary(ii, 8))
1302
1303 for i in range(numofstrings):
1304 self.fhandle.seek(msgids_index[i][1])
1305 msgid = self.fhandle.read(msgids_index[i][0])
1306 self.fhandle.seek(msgstrs_index[i][1])
1307 msgstr = self.fhandle.read(msgstrs_index[i][0])
1308 if i == 0:
1309 raw_metadata, metadata = _strsplit(msgstr, '\n'), {}
1310 for line in raw_metadata:
1311 tokens = _strsplit(line, ':', 1)
1312 if tokens[0] != '':
1313 try:
1314 metadata[tokens[0]] = _strstrip(tokens[1])
1315 except IndexError:
1316 metadata[tokens[0]] = ''
1317 self.instance.metadata = metadata
1318 continue
1319 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1320 _listappend(self.instance, entry)
1321
1322 self.fhandle.close()
1323 return self.instance
1324
1326 """
1327 Private method that unpack n bytes of data using format <fmt>.
1328 It returns a tuple or a mixed value if the tuple length is 1.
1329 """
1330 bytes = self.fhandle.read(numbytes)
1331 tup = struct.unpack(fmt, bytes)
1332 if len(tup) == 1:
1333 return tup[0]
1334 return tup
1335
1336
1337
1338
1339 if __name__ == '__main__':
1340 """
1341 **Main function**::
1342 - to **test** the module just run: *python polib.py [-v]*
1343 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1344 """
1345 import sys
1346 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1348 if f.endswith('po'):
1349 p = pofile(f)
1350 else:
1351 p = mofile(f)
1352 s = str(p)
1353 import profile
1354 profile.run('test("'+sys.argv[2]+'")')
1355 else:
1356 import doctest
1357 doctest.testmod()
1358
1359
1360