Module polib
[hide private]
[frames] | no frames]

Source Code for Module polib

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # License: MIT (see LICENSE file provided) 
   5  # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: 
   6   
   7  """ 
   8  **polib** allows you to manipulate, create, modify gettext files (pot, po 
   9  and mo files).  You can load existing files, iterate through it's entries, 
  10  add, modify entries, comments or metadata, etc... or create new po files 
  11  from scratch. 
  12   
  13  **polib** provides a simple and pythonic API, exporting only three 
  14  convenience functions (*pofile*, *mofile* and *detect_encoding*), and the 
  15  four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating 
  16  new files/entries. 
  17   
  18  **Basic example**: 
  19   
  20  >>> import polib 
  21  >>> # load an existing po file 
  22  >>> po = polib.pofile('tests/test_utf8.po') 
  23  >>> for entry in po: 
  24  ...     # do something with entry... 
  25  ...     pass 
  26  >>> # add an entry 
  27  >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue') 
  28  >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')] 
  29  >>> po.append(entry) 
  30  >>> # to save our modified po file: 
  31  >>> # po.save() 
  32  >>> # or you may want to compile the po file 
  33  >>> # po.save_as_mofile('tests/test_utf8.mo') 
  34  """ 
  35   
  36  __author__    = 'David JEAN LOUIS <izimobil@gmail.com>' 
  37  __version__   = '0.4.1' 
  38  __all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', 
  39                   'detect_encoding', 'escape', 'unescape'] 
  40   
  41  import struct 
  42  import textwrap 
  43  import warnings 
  44   
  45  default_encoding = 'utf-8' 
  46   
  47  # function pofile() {{{ 
  48   
49 -def pofile(fpath, **kwargs):
50 """ 51 Convenience function that parse the po/pot file *fpath* and return 52 a POFile instance. 53 54 **Keyword arguments**: 55 - *fpath*: string, full or relative path to the po/pot file to parse 56 - *wrapwidth*: integer, the wrap width, only useful when -w option was 57 passed to xgettext (optional, default to 78) 58 - *autodetect_encoding*: boolean, if set to False the function will 59 not try to detect the po file encoding (optional, default to True) 60 - *encoding*: string, an encoding, only relevant if autodetect_encoding 61 is set to False 62 63 **Example**: 64 65 >>> import polib 66 >>> po = polib.pofile('tests/test_weird_occurrences.po') 67 >>> po #doctest: +ELLIPSIS 68 <POFile instance at ...> 69 >>> import os, tempfile 70 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']: 71 ... orig_po = polib.pofile('tests/'+fname) 72 ... tmpf = tempfile.NamedTemporaryFile().name 73 ... orig_po.save(tmpf) 74 ... try: 75 ... new_po = polib.pofile(tmpf) 76 ... for old, new in zip(orig_po, new_po): 77 ... if old.msgid != new.msgid: 78 ... old.msgid 79 ... new.msgid 80 ... if old.msgstr != new.msgstr: 81 ... old.msgid 82 ... new.msgid 83 ... finally: 84 ... os.unlink(tmpf) 85 """ 86 if kwargs.get('autodetect_encoding', True) == True: 87 enc = detect_encoding(fpath) 88 else: 89 enc = kwargs.get('encoding', default_encoding) 90 parser = _POFileParser(fpath) 91 instance = parser.parse() 92 instance.wrapwidth = kwargs.get('wrapwidth', 78) 93 instance.encoding = enc 94 return instance
95 96 # }}} 97 # function mofile() {{{ 98
99 -def mofile(fpath, **kwargs):
100 """ 101 Convenience function that parse the mo file *fpath* and return 102 a MOFile instance. 103 104 **Keyword arguments**: 105 - *fpath*: string, full or relative path to the mo file to parse 106 - *wrapwidth*: integer, the wrap width, only useful when -w option was 107 passed to xgettext to generate the po file that was used to format 108 the mo file (optional, default to 78) 109 - *autodetect_encoding*: boolean, if set to False the function will 110 not try to detect the po file encoding (optional, default to True) 111 - *encoding*: string, an encoding, only relevant if autodetect_encoding 112 is set to False 113 114 **Example**: 115 116 >>> import polib 117 >>> mo = polib.mofile('tests/test_utf8.mo') 118 >>> mo #doctest: +ELLIPSIS 119 <MOFile instance at ...> 120 >>> import os, tempfile 121 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']: 122 ... orig_mo = polib.mofile('tests/'+fname) 123 ... tmpf = tempfile.NamedTemporaryFile().name 124 ... orig_mo.save(tmpf) 125 ... try: 126 ... new_mo = polib.mofile(tmpf) 127 ... for old, new in zip(orig_mo, new_mo): 128 ... if old.msgid != new.msgid: 129 ... old.msgstr 130 ... new.msgstr 131 ... finally: 132 ... os.unlink(tmpf) 133 """ 134 if kwargs.get('autodetect_encoding', True) == True: 135 enc = detect_encoding(fpath) 136 else: 137 enc = kwargs.get('encoding', default_encoding) 138 parser = _MOFileParser(fpath) 139 instance = parser.parse() 140 instance.wrapwidth = kwargs.get('wrapwidth', 78) 141 instance.encoding = enc 142 return instance
143 144 # }}} 145 # function detect_encoding() {{{ 146
147 -def detect_encoding(fpath):
148 """ 149 Try to detect the encoding used by the file *fpath*. The function will 150 return polib default *encoding* if it's unable to detect it. 151 152 **Keyword argument**: 153 - *fpath*: string, full or relative path to the mo file to parse. 154 155 **Examples**: 156 157 >>> print(detect_encoding('tests/test_noencoding.po')) 158 utf-8 159 >>> print(detect_encoding('tests/test_utf8.po')) 160 UTF-8 161 >>> print(detect_encoding('tests/test_utf8.mo')) 162 UTF-8 163 >>> print(detect_encoding('tests/test_iso-8859-15.po')) 164 ISO_8859-15 165 >>> print(detect_encoding('tests/test_iso-8859-15.mo')) 166 ISO_8859-15 167 """ 168 import re 169 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)') 170 f = open(fpath) 171 for l in f: 172 match = rx.search(l) 173 if match: 174 f.close() 175 return match.group(1).strip() 176 f.close() 177 return default_encoding
178 179 # }}} 180 # function escape() {{{ 181
182 -def escape(st):
183 """ 184 Escape special chars and return the given string *st*. 185 186 **Examples**: 187 188 >>> escape('\\t and \\n and \\r and " and \\\\') 189 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\' 190 """ 191 st = st.replace('\\', r'\\') 192 st = st.replace('\t', r'\t') 193 st = st.replace('\r', r'\r') 194 st = st.replace('\n', r'\n') 195 st = st.replace('\"', r'\"') 196 return st
197 198 # }}} 199 # function unescape() {{{ 200
201 -def unescape(st):
202 """ 203 Unescape special chars and return the given string *st*. 204 205 **Examples**: 206 207 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\') 208 '\\t and \\n and \\r and " and \\\\' 209 """ 210 st = st.replace(r'\"', '"') 211 st = st.replace(r'\n', '\n') 212 st = st.replace(r'\r', '\r') 213 st = st.replace(r'\t', '\t') 214 st = st.replace(r'\\', '\\') 215 return st
216 217 # }}} 218 # class _BaseFile {{{ 219
220 -class _BaseFile(list):
221 """ 222 Common parent class for POFile and MOFile classes. 223 This class must **not** be instanciated directly. 224 """ 225
226 - def __init__(self, fpath=None, wrapwidth=78, encoding=default_encoding):
227 """ 228 Constructor. 229 230 **Keyword arguments**: 231 - *fpath*: string, path to po or mo file 232 - *wrapwidth*: integer, the wrap width, only useful when -w option 233 was passed to xgettext to generate the po file that was used to 234 format the mo file, default to 78 (optional). 235 """ 236 list.__init__(self) 237 # the opened file handle 238 self.fpath = fpath 239 # the width at which lines should be wrapped 240 self.wrapwidth = wrapwidth 241 # the file encoding 242 self.encoding = encoding 243 # header 244 self.header = '' 245 # both po and mo files have metadata 246 self.metadata = {} 247 self.metadata_is_fuzzy = 0
248
249 - def __str__(self):
250 """String representation of the file.""" 251 ret = [] 252 entries = [self.metadata_as_entry()] + \ 253 [e for e in self if not e.obsolete] 254 for entry in entries: 255 ret.append(entry.__str__(self.wrapwidth)) 256 for entry in self.obsolete_entries(): 257 ret.append(entry.__str__(self.wrapwidth)) 258 return '\n'.join(ret)
259
260 - def __repr__(self):
261 """Return the official string representation of the object.""" 262 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
263
264 - def metadata_as_entry(self):
265 """Return the metadata as an entry""" 266 e = POEntry(msgid='') 267 mdata = self.ordered_metadata() 268 if mdata: 269 strs = [] 270 for name, value in mdata: 271 # Strip whitespace off each line in a multi-line entry 272 value = '\n'.join([v.strip() for v in value.split('\n')]) 273 strs.append('%s: %s' % (name, value)) 274 e.msgstr = '\n'.join(strs) + '\n' 275 return e
276
277 - def save(self, fpath=None, repr_method='__str__'):
278 """ 279 Save the po file to file *fpath* if no file handle exists for 280 the object. If there's already an open file and no fpath is 281 provided, then the existing file is rewritten with the modified 282 data. 283 284 **Keyword arguments**: 285 - *fpath*: string, full or relative path to the file. 286 - *repr_method*: string, the method to use for output. 287 """ 288 if self.fpath is None and fpath is None: 289 raise IOError('You must provide a file path to save() method') 290 contents = getattr(self, repr_method)() 291 if fpath is None: 292 fpath = self.fpath 293 mode = 'w' 294 if repr_method == 'to_binary': 295 mode += 'b' 296 fhandle = open(fpath, mode) 297 fhandle.write(contents) 298 fhandle.close()
299
300 - def find(self, st, by='msgid'):
301 """ 302 Find entry which msgid (or property identified by the *by* 303 attribute) matches the string *st*. 304 305 **Keyword arguments**: 306 - *st*: string, the string to search for 307 - *by*: string, the comparison attribute 308 309 **Examples**: 310 311 >>> po = pofile('tests/test_utf8.po') 312 >>> entry = po.find('Thursday') 313 >>> entry.msgstr 314 'Jueves' 315 >>> entry = po.find('Some unexistant msgid') 316 >>> entry is None 317 True 318 >>> entry = po.find('Jueves', 'msgstr') 319 >>> entry.msgid 320 'Thursday' 321 """ 322 try: 323 return [e for e in self if getattr(e, by) == st][0] 324 except IndexError: 325 return None
326
327 - def ordered_metadata(self):
328 """ 329 Convenience method that return the metadata ordered. The return 330 value is list of tuples (metadata name, metadata_value). 331 """ 332 # copy the dict first 333 metadata = self.metadata.copy() 334 data_order = [ 335 'Project-Id-Version', 336 'Report-Msgid-Bugs-To', 337 'POT-Creation-Date', 338 'PO-Revision-Date', 339 'Last-Translator', 340 'Language-Team', 341 'MIME-Version', 342 'Content-Type', 343 'Content-Transfer-Encoding' 344 ] 345 ordered_data = [] 346 for data in data_order: 347 try: 348 value = metadata.pop(data) 349 ordered_data.append((data, value)) 350 except KeyError: 351 pass 352 # the rest of the metadata won't be ordered there are no specs for this 353 keys = metadata.keys() 354 list(keys).sort() 355 for data in keys: 356 value = metadata[data] 357 ordered_data.append((data, value)) 358 return ordered_data
359
360 - def to_binary(self):
361 """Return the mofile binary representation.""" 362 import struct 363 import array 364 output = '' 365 offsets = [] 366 ids = strs = '' 367 entries = self.translated_entries() 368 # the keys are sorted in the .mo file 369 def cmp(_self, other): 370 if _self.msgid > other.msgid: 371 return 1 372 elif _self.msgid < other.msgid: 373 return -1 374 else: 375 return 0
376 entries.sort(cmp) 377 # add metadata entry 378 mentry = self.metadata_as_entry() 379 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() + '\n' 380 entries = [mentry] + entries 381 entries_len = len(entries) 382 for e in entries: 383 # For each string, we need size and file offset. Each string is 384 # NUL terminated; the NUL does not count into the size. 385 msgid = e._decode(e.msgid) 386 msgstr = e._decode(e.msgstr) 387 offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) 388 ids += msgid + '\0' 389 strs += msgstr + '\0' 390 # The header is 7 32-bit unsigned integers. 391 keystart = 7*4+16*entries_len 392 # and the values start after the keys 393 valuestart = keystart + len(ids) 394 koffsets = [] 395 voffsets = [] 396 # The string table first has the list of keys, then the list of values. 397 # Each entry has first the size of the string, then the file offset. 398 for o1, l1, o2, l2 in offsets: 399 koffsets += [l1, o1+keystart] 400 voffsets += [l2, o2+valuestart] 401 offsets = koffsets + voffsets 402 output = struct.pack("IIIIIII", 403 0x950412de, # Magic number 404 0, # Version 405 entries_len, # # of entries 406 7*4, # start of key index 407 7*4+entries_len*8, # start of value index 408 0, 0) # size and offset of hash table 409 output += array.array("I", offsets).tostring() 410 output += ids 411 output += strs 412 return output
413 414 # }}} 415 # class POFile {{{ 416
417 -class POFile(_BaseFile):
418 ''' 419 Po (or Pot) file reader/writer. 420 POFile objects inherit the list objects methods. 421 422 **Example**: 423 424 >>> po = POFile() 425 >>> entry1 = POEntry( 426 ... msgid="Some english text", 427 ... msgstr="Un texte en anglais" 428 ... ) 429 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)] 430 >>> entry1.comment = "Some useful comment" 431 >>> entry2 = POEntry( 432 ... msgid="Peace in some languages", 433 ... msgstr="Pace سلام שלום Hasîtî 和平" 434 ... ) 435 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)] 436 >>> entry2.comment = "Another useful comment" 437 >>> entry3 = POEntry( 438 ... msgid='Some entry with quotes " \\"', 439 ... msgstr='Un message unicode avec des quotes " \\"' 440 ... ) 441 >>> entry3.comment = "Test string quoting" 442 >>> po.append(entry1) 443 >>> po.append(entry2) 444 >>> po.append(entry3) 445 >>> po.header = "Some Header" 446 >>> print(po) 447 # Some Header 448 msgid "" 449 msgstr "" 450 <BLANKLINE> 451 #. Some useful comment 452 #: testfile:12 another_file:1 453 msgid "Some english text" 454 msgstr "Un texte en anglais" 455 <BLANKLINE> 456 #. Another useful comment 457 #: testfile:15 another_file:5 458 msgid "Peace in some languages" 459 msgstr "Pace سلام שלום Hasîtî 和平" 460 <BLANKLINE> 461 #. Test string quoting 462 msgid "Some entry with quotes \\" \\"" 463 msgstr "Un message unicode avec des quotes \\" \\"" 464 <BLANKLINE> 465 ''' 466
467 - def __str__(self):
468 """Return the string representation of the po file""" 469 ret, headers = '', self.header.split('\n') 470 for header in headers: 471 if header[:1] in [',', ':']: 472 ret += '#%s\n' % header 473 else: 474 ret += '# %s\n' % header 475 return ret + _BaseFile.__str__(self)
476
477 - def save_as_mofile(self, fpath):
478 """ 479 Save the binary representation of the file to *fpath*. 480 481 **Keyword arguments**: 482 - *fpath*: string, full or relative path to the file. 483 """ 484 _BaseFile.save(self, fpath, 'to_binary')
485
486 - def percent_translated(self):
487 """ 488 Convenience method that return the percentage of translated 489 messages. 490 491 **Example**: 492 493 >>> import polib 494 >>> po = polib.pofile('tests/test_pofile_helpers.po') 495 >>> po.percent_translated() 496 50 497 >>> po = POFile() 498 >>> po.percent_translated() 499 100 500 """ 501 total = len([e for e in self if not e.obsolete]) 502 if total == 0: 503 return 100 504 translated = len(self.translated_entries()) 505 return int((100.00 / float(total)) * translated)
506
507 - def translated_entries(self):
508 """ 509 Convenience method that return a list of translated entries. 510 511 **Example**: 512 513 >>> import polib 514 >>> po = polib.pofile('tests/test_pofile_helpers.po') 515 >>> len(po.translated_entries()) 516 6 517 """ 518 return [e for e in self if e.translated() and not e.obsolete]
519
520 - def untranslated_entries(self):
521 """ 522 Convenience method that return a list of untranslated entries. 523 524 **Example**: 525 526 >>> import polib 527 >>> po = polib.pofile('tests/test_pofile_helpers.po') 528 >>> len(po.untranslated_entries()) 529 6 530 """ 531 return [e for e in self if not e.translated() and not e.obsolete]
532
533 - def fuzzy_entries(self):
534 """ 535 Convenience method that return the list of 'fuzzy' entries. 536 537 **Example**: 538 539 >>> import polib 540 >>> po = polib.pofile('tests/test_pofile_helpers.po') 541 >>> len(po.fuzzy_entries()) 542 2 543 """ 544 return [e for e in self if 'fuzzy' in e.flags]
545
546 - def obsolete_entries(self):
547 """ 548 Convenience method that return the list of obsolete entries. 549 550 **Example**: 551 552 >>> import polib 553 >>> po = polib.pofile('tests/test_pofile_helpers.po') 554 >>> len(po.obsolete_entries()) 555 4 556 """ 557 return [e for e in self if e.obsolete]
558
559 - def merge(self, refpot):
560 """ 561 XXX this could not work if encodings are different, needs thinking 562 and general refactoring of how polib handles encoding... 563 564 Convenience method that merge the current pofile with the pot file 565 provided. It behaves exactly as the gettext msgmerge utility: 566 567 - comments of this file will be preserved, but extracted comments 568 and occurrences will be discarded 569 - any translations or comments in the file will be discarded, 570 however dot comments and file positions will be preserved 571 572 **Keyword argument**: 573 - *refpot*: object POFile, the reference catalog. 574 575 **Example**: 576 577 >>> import polib 578 >>> refpot = polib.pofile('tests/test_merge.pot') 579 >>> po = polib.pofile('tests/test_merge_before.po') 580 >>> po.merge(refpot) 581 >>> expected_po = polib.pofile('tests/test_merge_after.po') 582 >>> str(po) == str(expected_po) 583 True 584 """ 585 for entry in refpot: 586 e = self.find(entry.msgid) 587 if e is None: 588 e = POEntry() 589 self.append(e) 590 e.merge(entry) 591 # ok, now we must "obsolete" entries that are not in the refpot 592 # anymore 593 for entry in self: 594 if refpot.find(entry.msgid) is None: 595 entry.obsolete = True
596 597 # }}} 598 # class MOFile {{{ 599
600 -class MOFile(_BaseFile):
601 ''' 602 Mo file reader/writer. 603 MOFile objects inherit the list objects methods. 604 605 **Example**: 606 607 >>> mo = MOFile() 608 >>> entry1 = POEntry( 609 ... msgid="Some english text", 610 ... msgstr="Un texte en anglais" 611 ... ) 612 >>> entry2 = POEntry( 613 ... msgid="I need my dirty cheese", 614 ... msgstr="Je veux mon sale fromage" 615 ... ) 616 >>> entry3 = MOEntry( 617 ... msgid='Some entry with quotes " \\"', 618 ... msgstr='Un message unicode avec des quotes " \\"' 619 ... ) 620 >>> mo.append(entry1) 621 >>> mo.append(entry2) 622 >>> mo.append(entry3) 623 >>> print(mo) 624 msgid "" 625 msgstr "" 626 <BLANKLINE> 627 msgid "Some english text" 628 msgstr "Un texte en anglais" 629 <BLANKLINE> 630 msgid "I need my dirty cheese" 631 msgstr "Je veux mon sale fromage" 632 <BLANKLINE> 633 msgid "Some entry with quotes \\" \\"" 634 msgstr "Un message unicode avec des quotes \\" \\"" 635 <BLANKLINE> 636 ''' 637
638 - def __init__(self, fpath=None, wrapwidth=78):
639 """ 640 MOFile constructor. 641 See _BaseFile.__construct. 642 """ 643 _BaseFile.__init__(self, fpath, wrapwidth) 644 self.magic_number = None 645 self.version = 0
646
647 - def save_as_pofile(self, fpath):
648 """ 649 Save the string representation of the file to *fpath*. 650 651 **Keyword argument**: 652 - *fpath*: string, full or relative path to the file. 653 """ 654 _BaseFile.save(self, fpath)
655
656 - def save(self, fpath):
657 """ 658 Save the binary representation of the file to *fpath*. 659 660 **Keyword argument**: 661 - *fpath*: string, full or relative path to the file. 662 """ 663 _BaseFile.save(self, fpath, 'to_binary')
664
665 - def percent_translated(self):
666 """ 667 Convenience method to keep the same interface with POFile instances. 668 """ 669 return 100
670
671 - def translated_entries(self):
672 """ 673 Convenience method to keep the same interface with POFile instances. 674 """ 675 return self
676
677 - def untranslated_entries(self):
678 """ 679 Convenience method to keep the same interface with POFile instances. 680 """ 681 return []
682
683 - def fuzzy_entries(self):
684 """ 685 Convenience method to keep the same interface with POFile instances. 686 """ 687 return []
688
689 - def obsolete_entries(self):
690 """ 691 Convenience method to keep the same interface with POFile instances. 692 """ 693 return []
694 695 # }}} 696 # class _BaseEntry {{{ 697
698 -class _BaseEntry(object):
699 """ 700 Base class for POEntry or MOEntry objects. 701 This class must *not* be instanciated directly. 702 """ 703
704 - def __init__(self, *args, **kwargs):
705 """Base Entry constructor.""" 706 self.msgid = kwargs.get('msgid', '') 707 self.msgstr = kwargs.get('msgstr', '') 708 self.msgid_plural = kwargs.get('msgid_plural', '') 709 self.msgstr_plural = kwargs.get('msgstr_plural', {}) 710 self.obsolete = kwargs.get('obsolete', False) 711 self.encoding = kwargs.get('encoding', default_encoding)
712
713 - def __repr__(self):
714 """Return the official string representation of the object.""" 715 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
716
717 - def __str__(self, wrapwidth=78):
718 """ 719 Common string representation of the POEntry and MOEntry 720 objects. 721 """ 722 if self.obsolete: 723 delflag = '#~ ' 724 else: 725 delflag = '' 726 # write the msgid 727 ret = [] 728 ret += self._str_field("msgid", delflag, "", self.msgid) 729 # write the msgid_plural if any 730 if self.msgid_plural: 731 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural) 732 if self.msgstr_plural: 733 # write the msgstr_plural if any 734 msgstrs = self.msgstr_plural 735 keys = list(msgstrs) 736 keys.sort() 737 for index in keys: 738 msgstr = msgstrs[index] 739 plural_index = '[%s]' % index 740 ret += self._str_field("msgstr", delflag, plural_index, msgstr) 741 else: 742 # otherwise write the msgstr 743 ret += self._str_field("msgstr", delflag, "", self.msgstr) 744 ret.append('') 745 return '\n'.join(ret)
746
747 - def _str_field(self, fieldname, delflag, plural_index, field):
748 field = self._decode(field) 749 lines = field.splitlines(True) # keep line breaks in strings 750 # potentially, we could do line-wrapping here, but textwrap.wrap 751 # treats whitespace too carelessly for us to use it. 752 if len(lines) > 1: 753 lines = ['']+lines # start with initial empty line 754 else: 755 lines = [field] # needed for the empty string case 756 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, 757 escape(lines.pop(0)))] 758 for mstr in lines: 759 ret.append('%s"%s"' % (delflag, escape(mstr))) 760 return ret
761
762 - def _decode(self, st):
763 try: 764 if isinstance(st, unicode): 765 st = st.encode(self.encoding) 766 return st 767 except: 768 return st
769 770 # }}} 771 # class POEntry {{{ 772
773 -class POEntry(_BaseEntry):
774 """ 775 Represents a po file entry. 776 777 **Examples**: 778 779 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue') 780 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)] 781 >>> print(entry) 782 #: welcome.py:12 anotherfile.py:34 783 msgid "Welcome" 784 msgstr "Bienvenue" 785 <BLANKLINE> 786 >>> entry = POEntry() 787 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)] 788 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...' 789 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...' 790 >>> entry.flags.append('c-format') 791 >>> entry.msgid = 'I have spam but no egg !' 792 >>> entry.msgid_plural = 'I have spam and %d eggs !' 793 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !" 794 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !" 795 >>> print(entry) 796 #. A plural translation. This is a very very very long line please do not 797 #. wrap, this is just for testing comment wrapping... 798 # A plural translation. This is a very very very long line please do not wrap, 799 # this is just for testing comment wrapping... 800 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32 801 #: src/eggs.c:45 802 #, c-format 803 msgid "I have spam but no egg !" 804 msgid_plural "I have spam and %d eggs !" 805 msgstr[0] "J'ai du jambon mais aucun oeuf !" 806 msgstr[1] "J'ai du jambon et %d oeufs !" 807 <BLANKLINE> 808 """ 809
810 - def __init__(self, *args, **kwargs):
811 """POEntry constructor.""" 812 _BaseEntry.__init__(self, *args, **kwargs) 813 self.comment = kwargs.get('comment', '') 814 self.tcomment = kwargs.get('tcomment', '') 815 self.occurrences = kwargs.get('occurrences', []) 816 self.flags = kwargs.get('flags', [])
817
818 - def __str__(self, wrapwidth=78):
819 """ 820 Return the string representation of the entry. 821 """ 822 if self.obsolete: 823 return _BaseEntry.__str__(self) 824 ret = [] 825 # comment first, if any (with text wrapping as xgettext does) 826 if self.comment != '': 827 comments = self._decode(self.comment).split('\n') 828 for comment in comments: 829 if wrapwidth > 0 and len(comment) > wrapwidth-3: 830 ret += textwrap.wrap(comment, wrapwidth, 831 initial_indent='#. ', 832 subsequent_indent='#. ', 833 break_long_words=False) 834 else: 835 ret.append('#. %s' % comment) 836 # translator comment, if any (with text wrapping as xgettext does) 837 if self.tcomment != '': 838 tcomments = self._decode(self.tcomment).split('\n') 839 for tcomment in tcomments: 840 if wrapwidth > 0 and len(tcomment) > wrapwidth-2: 841 ret += textwrap.wrap(tcomment, wrapwidth, 842 initial_indent='# ', 843 subsequent_indent='# ', 844 break_long_words=False) 845 else: 846 ret.append('# %s' % tcomment) 847 # occurrences (with text wrapping as xgettext does) 848 if self.occurrences: 849 filelist = [] 850 for fpath, lineno in self.occurrences: 851 if lineno: 852 filelist.append('%s:%s' % (self._decode(fpath), lineno)) 853 else: 854 filelist.append('%s' % (self._decode(fpath))) 855 filestr = ' '.join(filelist) 856 if wrapwidth > 0 and len(filestr)+3 > wrapwidth: 857 # XXX textwrap split words that contain hyphen, this is not 858 # what we want for filenames, so the dirty hack is to 859 # temporally replace hyphens with a char that a file cannot 860 # contain, like "*" 861 lines = textwrap.wrap(filestr.replace('-', '*'), 862 wrapwidth, 863 initial_indent='#: ', 864 subsequent_indent='#: ', 865 break_long_words=False) 866 # end of the replace hack 867 for line in lines: 868 ret.append(line.replace('*', '-')) 869 else: 870 ret.append('#: '+filestr) 871 # flags 872 if self.flags: 873 flags = [] 874 for flag in self.flags: 875 flags.append(flag) 876 ret.append('#, %s' % ', '.join(flags)) 877 ret.append(_BaseEntry.__str__(self)) 878 return '\n'.join(ret)
879
880 - def __cmp__(self, other):
881 ''' 882 Called by comparison operations if rich comparison is not defined. 883 884 **Tests**: 885 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)]) 886 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)]) 887 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)]) 888 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)]) 889 >>> po = POFile() 890 >>> po.append(a) 891 >>> po.append(b) 892 >>> po.append(c1) 893 >>> po.append(c2) 894 >>> po.sort() 895 >>> print(po) 896 # 897 msgid "" 898 msgstr "" 899 <BLANKLINE> 900 #: a.py:1 a.py:3 901 msgid "c2" 902 msgstr "" 903 <BLANKLINE> 904 #: a.py:1 b.py:1 905 msgid "c1" 906 msgstr "" 907 <BLANKLINE> 908 #: b.py:1 b.py:3 909 msgid "a" 910 msgstr "" 911 <BLANKLINE> 912 #: b.py:1 b.py:3 913 msgid "b" 914 msgstr "" 915 <BLANKLINE> 916 ''' 917 def compare_occurrences(a, b): 918 """ 919 Compare an entry occurrence with another one. 920 """ 921 if a[0] != b[0]: 922 return a[0] < b[0] 923 if a[1] != b[1]: 924 return a[1] < b[1] 925 return 0
926 927 # First: Obsolete test 928 if self.obsolete != other.obsolete: 929 if self.obsolete: 930 return -1 931 else: 932 return 1 933 # Work on a copy to protect original 934 occ1 = self.occurrences[:] 935 occ2 = other.occurrences[:] 936 # Sorting using compare method 937 occ1.sort(compare_occurrences) 938 occ2.sort(compare_occurrences) 939 # Comparing sorted occurrences 940 pos = 0 941 for entry1 in occ1: 942 try: 943 entry2 = occ2[pos] 944 except IndexError: 945 return 1 946 pos = pos + 1 947 if entry1[0] != entry2[0]: 948 if entry1[0] > entry2[0]: 949 return 1 950 else: 951 return -1 952 if entry1[1] != entry2[1]: 953 if entry1[1] > entry2[1]: 954 return 1 955 else: 956 return -1 957 # Finally: Compare message ID 958 if self.msgid > other.msgid: return 1 959 else: return -1
960
961 - def translated(self):
962 """ 963 Return True if the entry has been translated or False. 964 """ 965 if self.obsolete or 'fuzzy' in self.flags: 966 return False 967 if self.msgstr != '': 968 return True 969 if self.msgstr_plural: 970 for pos in self.msgstr_plural: 971 if self.msgstr_plural[pos] == '': 972 return False 973 return True 974 return False
975
976 - def merge(self, other):
977 """ 978 Merge the current entry with the given pot entry. 979 """ 980 self.msgid = other.msgid 981 self.occurrences = other.occurrences 982 self.comment = other.comment 983 self.flags = other.flags 984 self.msgid_plural = other.msgid_plural 985 if other.msgstr_plural: 986 for pos in other.msgstr_plural: 987 try: 988 # keep existing translation at pos if any 989 self.msgstr_plural[pos] 990 except KeyError: 991 self.msgstr_plural[pos] = ''
992 993 # }}} 994 # class MOEntry {{{ 995
996 -class MOEntry(_BaseEntry):
997 """ 998 Represents a mo file entry. 999 1000 **Examples**: 1001 1002 >>> entry = MOEntry() 1003 >>> entry.msgid = 'translate me !' 1004 >>> entry.msgstr = 'traduisez moi !' 1005 >>> print(entry) 1006 msgid "translate me !" 1007 msgstr "traduisez moi !" 1008 <BLANKLINE> 1009 """ 1010
1011 - def __str__(self, wrapwidth=78):
1012 """ 1013 Return the string representation of the entry. 1014 """ 1015 return _BaseEntry.__str__(self, wrapwidth)
1016 1017 # }}} 1018 # class _POFileParser {{{ 1019
1020 -class _POFileParser(object):
1021 """ 1022 A finite state machine to parse efficiently and correctly po 1023 file format. 1024 """ 1025
1026 - def __init__(self, fpath):
1027 """ 1028 Constructor. 1029 1030 **Keyword argument**: 1031 - *fpath*: string, path to the po file 1032 """ 1033 self.fhandle = open(fpath, 'r') 1034 self.instance = POFile(fpath=fpath) 1035 self.transitions = {} 1036 self.current_entry = POEntry() 1037 self.current_state = 'ST' 1038 self.current_token = None 1039 # two memo flags used in handlers 1040 self.msgstr_index = 0 1041 self.entry_obsolete = 0 1042 # Configure the state machine, by adding transitions. 1043 # Signification of symbols: 1044 # * ST: Beginning of the file (start) 1045 # * HE: Header 1046 # * TC: a translation comment 1047 # * GC: a generated comment 1048 # * OC: a file/line occurence 1049 # * FL: a flags line 1050 # * MI: a msgid 1051 # * MP: a msgid plural 1052 # * MS: a msgstr 1053 # * MX: a msgstr plural 1054 # * MC: a msgid or msgstr continuation line 1055 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'] 1056 1057 self.add('TC', ['ST', 'HE'], 'HE') 1058 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC') 1059 self.add('GC', all_, 'GC') 1060 self.add('OC', all_, 'OC') 1061 self.add('FL', all_, 'FL') 1062 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI') 1063 self.add('MP', ['TC', 'GC', 'MI'], 'MP') 1064 self.add('MS', ['MI', 'MP', 'TC'], 'MS') 1065 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX') 1066 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
1067
1068 - def parse(self):
1069 """ 1070 Run the state machine, parse the file line by line and call process() 1071 with the current matched symbol. 1072 """ 1073 i, lastlen = 1, 0 1074 for line in self.fhandle: 1075 line = line.strip() 1076 if line == '': 1077 i = i+1 1078 continue 1079 if line[:3] == '#~ ': 1080 line = line[3:] 1081 self.entry_obsolete = 1 1082 else: 1083 self.entry_obsolete = 0 1084 self.current_token = line 1085 if line[:2] == '#:': 1086 # we are on a occurrences line 1087 self.process('OC', i) 1088 elif line[:7] == 'msgid "': 1089 # we are on a msgid 1090 self.process('MI', i) 1091 elif line[:8] == 'msgstr "': 1092 # we are on a msgstr 1093 self.process('MS', i) 1094 elif line[:1] == '"': 1095 # we are on a continuation line or some metadata 1096 self.process('MC', i) 1097 elif line[:14] == 'msgid_plural "': 1098 # we are on a msgid plural 1099 self.process('MP', i) 1100 elif line[:7] == 'msgstr[': 1101 # we are on a msgstr plural 1102 self.process('MX', i) 1103 elif line[:3] == '#, ': 1104 # we are on a flags line 1105 self.process('FL', i) 1106 elif line[:2] == '# ' or line == '#': 1107 if line == '#': line = line + ' ' 1108 # we are on a translator comment line 1109 self.process('TC', i) 1110 elif line[:2] == '#.': 1111 # we are on a generated comment line 1112 self.process('GC', i) 1113 i = i+1 1114 1115 if self.current_entry: 1116 # since entries are added when another entry is found, we must add 1117 # the last entry here (only if there are lines) 1118 self.instance.append(self.current_entry) 1119 # before returning the instance, check if there's metadata and if 1120 # so extract it in a dict 1121 firstentry = self.instance[0] 1122 if firstentry.msgid == '': # metadata found 1123 # remove the entry 1124 firstentry = self.instance.pop(0) 1125 self.instance.metadata_is_fuzzy = firstentry.flags 1126 key = None 1127 for msg in firstentry.msgstr.splitlines(): 1128 try: 1129 key, val = msg.split(':', 1) 1130 self.instance.metadata[key] = val.strip() 1131 except: 1132 if key is not None: 1133 self.instance.metadata[key] += '\n'+ msg.strip() 1134 # close opened file 1135 self.fhandle.close() 1136 return self.instance
1137
1138 - def add(self, symbol, states, next_state):
1139 """ 1140 Add a transition to the state machine. 1141 Keywords arguments: 1142 1143 symbol -- string, the matched token (two chars symbol) 1144 states -- list, a list of states (two chars symbols) 1145 next_state -- the next state the fsm will have after the action 1146 """ 1147 for state in states: 1148 action = getattr(self, 'handle_%s' % next_state.lower()) 1149 self.transitions[(symbol, state)] = (action, next_state)
1150
1151 - def process(self, symbol, linenum):
1152 """ 1153 Process the transition corresponding to the current state and the 1154 symbol provided. 1155 1156 Keywords arguments: 1157 symbol -- string, the matched token (two chars symbol) 1158 linenum -- integer, the current line number of the parsed file 1159 """ 1160 try: 1161 (action, state) = self.transitions[(symbol, self.current_state)] 1162 if action(): 1163 self.current_state = state 1164 except Exception, exc: 1165 raise IOError('Syntax error in po file (line %s)' % linenum)
1166 1167 # state handlers 1168
1169 - def handle_he(self):
1170 """Handle a header comment.""" 1171 if self.instance.header != '': 1172 self.instance.header += '\n' 1173 self.instance.header += self.current_token[2:] 1174 return 1
1175
1176 - def handle_tc(self):
1177 """Handle a translator comment.""" 1178 if self.current_state in ['MC', 'MS', 'MX']: 1179 self.instance.append(self.current_entry) 1180 self.current_entry = POEntry() 1181 if self.current_entry.tcomment != '': 1182 self.current_entry.tcomment += '\n' 1183 self.current_entry.tcomment += self.current_token[2:] 1184 return True
1185
1186 - def handle_gc(self):
1187 """Handle a generated comment.""" 1188 if self.current_state in ['MC', 'MS', 'MX']: 1189 self.instance.append(self.current_entry) 1190 self.current_entry = POEntry() 1191 if self.current_entry.comment != '': 1192 self.current_entry.comment += '\n' 1193 self.current_entry.comment += self.current_token[3:] 1194 return True
1195
1196 - def handle_oc(self):
1197 """Handle a file:num occurence.""" 1198 if self.current_state in ['MC', 'MS', 'MX']: 1199 self.instance.append(self.current_entry) 1200 self.current_entry = POEntry() 1201 occurrences = self.current_token[3:].split() 1202 for occurrence in occurrences: 1203 if occurrence != '': 1204 try: 1205 fil, line = occurrence.split(':') 1206 if not line.isdigit(): 1207 fil = fil + line 1208 line = '' 1209 self.current_entry.occurrences.append((fil, line)) 1210 except: 1211 self.current_entry.occurrences.append((occurrence, '')) 1212 return True
1213
1214 - def handle_fl(self):
1215 """Handle a flags line.""" 1216 if self.current_state in ['MC', 'MS', 'MX']: 1217 self.instance.append(self.current_entry) 1218 self.current_entry = POEntry() 1219 self.current_entry.flags += self.current_token[3:].split(', ') 1220 return True
1221
1222 - def handle_mi(self):
1223 """Handle a msgid.""" 1224 if self.current_state in ['MC', 'MS', 'MX']: 1225 self.instance.append(self.current_entry) 1226 self.current_entry = POEntry() 1227 self.current_entry.obsolete = self.entry_obsolete 1228 self.current_entry.msgid = unescape(self.current_token[7:-1]) 1229 return True
1230
1231 - def handle_mp(self):
1232 """Handle a msgid plural.""" 1233 self.current_entry.msgid_plural = unescape(self.current_token[14:-1]) 1234 return True
1235
1236 - def handle_ms(self):
1237 """Handle a msgstr.""" 1238 self.current_entry.msgstr = unescape(self.current_token[8:-1]) 1239 return True
1240
1241 - def handle_mx(self):
1242 """Handle a msgstr plural.""" 1243 index, value = self.current_token[7], self.current_token[11:-1] 1244 self.current_entry.msgstr_plural[index] = unescape(value) 1245 self.msgstr_index = index 1246 return True
1247
1248 - def handle_mc(self):
1249 """Handle a msgid or msgstr continuation line.""" 1250 if self.current_state == 'MI': 1251 self.current_entry.msgid += unescape(self.current_token[1:-1]) 1252 elif self.current_state == 'MP': 1253 self.current_entry.msgid_plural += \ 1254 unescape(self.current_token[1:-1]) 1255 elif self.current_state == 'MS': 1256 self.current_entry.msgstr += unescape(self.current_token[1:-1]) 1257 elif self.current_state == 'MX': 1258 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\ 1259 unescape(self.current_token[1:-1]) 1260 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr 1261 # don't change the current state 1262 return False
1263 1264 # }}} 1265 # class _MOFileParser {{{ 1266
1267 -class _MOFileParser(object):
1268 """ 1269 A class to parse binary mo files. 1270 """ 1271 BIG_ENDIAN = 0xde120495 1272 LITTLE_ENDIAN = 0x950412de 1273
1274 - def __init__(self, fpath):
1275 """_MOFileParser constructor.""" 1276 self.fhandle = open(fpath, 'rb') 1277 self.instance = MOFile(fpath)
1278
1279 - def parse_magicnumber(self):
1280 """ 1281 Parse the magic number and raise an exception if not valid. 1282 """
1283
1284 - def parse(self):
1285 """ 1286 Build the instance with the file handle provided in the 1287 constructor. 1288 """ 1289 magic_number = self._readbinary('<I', 4) 1290 if magic_number == self.LITTLE_ENDIAN: 1291 ii = '<II' 1292 elif magic_number == self.BIG_ENDIAN: 1293 ii = '>II' 1294 else: 1295 raise IOError('Invalid mo file, magic number is incorrect !') 1296 self.instance.magic_number = magic_number 1297 # parse the version number and the number of strings 1298 self.instance.version, numofstrings = self._readbinary(ii, 8) 1299 # original strings and translation strings hash table offset 1300 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) 1301 # move to msgid hash table and read length and offset of msgids 1302 self.fhandle.seek(msgids_hash_offset) 1303 msgids_index = [] 1304 for i in range(numofstrings): 1305 msgids_index.append(self._readbinary(ii, 8)) 1306 # move to msgstr hash table and read length and offset of msgstrs 1307 self.fhandle.seek(msgstrs_hash_offset) 1308 msgstrs_index = [] 1309 for i in range(numofstrings): 1310 msgstrs_index.append(self._readbinary(ii, 8)) 1311 # build entries 1312 for i in range(numofstrings): 1313 self.fhandle.seek(msgids_index[i][1]) 1314 msgid = self.fhandle.read(msgids_index[i][0]) 1315 self.fhandle.seek(msgstrs_index[i][1]) 1316 msgstr = self.fhandle.read(msgstrs_index[i][0]) 1317 if i == 0: # metadata 1318 raw_metadata, metadata = msgstr.split('\n'), {} 1319 for line in raw_metadata: 1320 tokens = line.split(':', 1) 1321 if tokens[0] != '': 1322 try: 1323 metadata[tokens[0]] = tokens[1].strip() 1324 except IndexError: 1325 metadata[tokens[0]] = '' 1326 self.instance.metadata = metadata 1327 continue 1328 entry = MOEntry(msgid=msgid, msgstr=msgstr) 1329 self.instance.append(entry) 1330 # close opened file 1331 self.fhandle.close() 1332 return self.instance
1333
1334 - def _readbinary(self, fmt, numbytes):
1335 """ 1336 Private method that unpack n bytes of data using format <fmt>. 1337 It returns a tuple or a mixed value if the tuple length is 1. 1338 """ 1339 bytes = self.fhandle.read(numbytes) 1340 tup = struct.unpack(fmt, bytes) 1341 if len(tup) == 1: 1342 return tup[0] 1343 return tup
1344 1345 # }}} 1346 # __main__ {{{ 1347 1348 if __name__ == '__main__': 1349 """ 1350 **Main function**:: 1351 - to **test** the module just run: *python polib.py [-v]* 1352 - to **profile** the module: *python polib.py -p <some_pofile.po>* 1353 """ 1354 import sys 1355 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1356 - def test(f):
1357 if f.endswith('po'): 1358 p = pofile(f) 1359 else: 1360 p = mofile(f) 1361 s = str(p)
1362 import profile 1363 profile.run('test("'+sys.argv[2]+'")') 1364 else: 1365 import doctest 1366 doctest.testmod() 1367 1368 # }}} 1369