Package translate :: Package storage :: Module fpo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.fpo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Classes for the support of Gettext .po and .pot files. 
 22   
 23  This implementation assumes that cpo is working. This should not be used 
 24  directly, but can be used once cpo has been established to work.""" 
 25   
 26  #TODO: 
 27  # - handle headerless PO files better 
 28  # - previous msgid and msgctxt 
 29  # - accept only unicodes everywhere 
 30   
 31  from translate.misc.multistring import multistring 
 32  from translate.lang import data 
 33  from translate.storage import pocommon, base, cpo, poparser 
 34  from translate.storage.pocommon import encodingToUse 
 35  import re 
 36  import copy 
 37  import cStringIO 
 38  import urllib 
 39   
 40  lsep = " " 
 41  """Seperator for #: entries""" 
 42   
 43  basic_header = r'''msgid "" 
 44  msgstr "" 
 45  "Content-Type: text/plain; charset=UTF-8\n" 
 46  "Content-Transfer-Encoding: 8bit\n" 
 47  ''' 
 48   
49 -class pounit(pocommon.pounit):
50 # othercomments = [] # # this is another comment 51 # automaticcomments = [] # #. comment extracted from the source code 52 # sourcecomments = [] # #: sourcefile.xxx:35 53 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 54 # prev_msgid = [] # 55 # prev_msgid_plural = [] # 56 # typecomments = [] # #, fuzzy 57 # msgidcomment = u"" # _: within msgid 58 # msgctxt 59 # msgid = [] 60 # msgstr = [] 61 62 # Our homegrown way to indicate what must be copied in a shallow 63 # fashion 64 __shallow__ = ['_store'] 65
66 - def __init__(self, source=None, encoding="UTF-8"):
67 pocommon.pounit.__init__(self, source) 68 self._encoding = encodingToUse(encoding) 69 self._initallcomments(blankall=True) 70 self._msgctxt = u"" 71 72 self.target = u""
73
74 - def _initallcomments(self, blankall=False):
75 """Initialises allcomments""" 76 if blankall: 77 self.othercomments = [] 78 self.automaticcomments = [] 79 self.sourcecomments = [] 80 self.typecomments = [] 81 self.msgidcomment = u""
82
83 - def getsource(self):
84 return self._source
85
86 - def setsource(self, source):
87 self._rich_source = None 88 # assert isinstance(source, unicode) 89 source = data.forceunicode(source or u"") 90 source = source or u"" 91 if isinstance(source, multistring): 92 self._source = source 93 elif isinstance(source, unicode): 94 self._source = source 95 else: 96 #unicode, list, dict 97 self._source = multistring(source)
98 source = property(getsource, setsource) 99 100
101 - def gettarget(self):
102 """Returns the unescaped msgstr""" 103 return self._target
104
105 - def settarget(self, target):
106 """Sets the msgstr to the given (unescaped) value""" 107 self._rich_target = None 108 # assert isinstance(target, unicode) 109 # target = data.forceunicode(target) 110 if self.hasplural(): 111 if isinstance(target, multistring): 112 self._target = target 113 else: 114 #unicode, list, dict 115 self._target = multistring(target) 116 elif isinstance(target, (dict, list)): 117 if len(target) == 1: 118 self._target = target[0] 119 else: 120 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 121 else: 122 self._target = target
123 target = property(gettarget, settarget) 124
125 - def getnotes(self, origin=None):
126 """Return comments based on origin value (programmer, developer, source code and translator)""" 127 if origin == None: 128 comments = u"\n".join(self.othercomments) 129 comments += u"\n".join(self.automaticcomments) 130 elif origin == "translator": 131 comments = u"\n".join(self.othercomments) 132 elif origin in ["programmer", "developer", "source code"]: 133 comments = u"\n".join(self.automaticcomments) 134 else: 135 raise ValueError("Comment type not valid") 136 return comments
137
138 - def addnote(self, text, origin=None, position="append"):
139 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 140 # ignore empty strings and strings without non-space characters 141 if not (text and text.strip()): 142 return 143 text = data.forceunicode(text) 144 commentlist = self.othercomments 145 autocomments = False 146 if origin in ["programmer", "developer", "source code"]: 147 autocomments = True 148 commentlist = self.automaticcomments 149 if text.endswith(u'\n'): 150 text = text[:-1] 151 newcomments = text.split(u"\n") 152 if position == "append": 153 newcomments = commentlist + newcomments 154 elif position == "prepend": 155 newcomments = newcomments + commentlist 156 157 if autocomments: 158 self.automaticcomments = newcomments 159 else: 160 self.othercomments = newcomments
161
162 - def removenotes(self):
163 """Remove all the translator's notes (other comments)""" 164 self.othercomments = []
165
166 - def __deepcopy__(self, memo={}):
167 # Make an instance to serve as the copy 168 new_unit = self.__class__() 169 # We'll be testing membership frequently, so make a set from 170 # self.__shallow__ 171 shallow = set(self.__shallow__) 172 # Make deep copies of all members which are not in shallow 173 for key, value in self.__dict__.iteritems(): 174 if key not in shallow: 175 setattr(new_unit, key, copy.deepcopy(value)) 176 # Make shallow copies of all members which are in shallow 177 for key in set(shallow): 178 setattr(new_unit, key, getattr(self, key)) 179 # Mark memo with ourself, so that we won't get deep copied 180 # again 181 memo[id(self)] = self 182 # Return our copied unit 183 return new_unit
184
185 - def copy(self):
186 return copy.deepcopy(self)
187
188 - def _msgidlen(self):
189 if self.hasplural(): 190 len("".join([string for string in self.source.strings])) 191 else: 192 return len(self.source)
193
194 - def _msgstrlen(self):
195 if self.hasplural(): 196 len("".join([string for string in self.target.strings])) 197 else: 198 return len(self.target)
199
200 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
201 """Merges the otherpo (with the same msgid) into this one. 202 203 Overwrite non-blank self.msgstr only if overwrite is True 204 merge comments only if comments is True 205 """ 206 207 def mergelists(list1, list2, split=False): 208 #decode where necessary 209 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 210 for position, item in enumerate(list1): 211 if isinstance(item, str): 212 list1[position] = item.decode("utf-8") 213 for position, item in enumerate(list2): 214 if isinstance(item, str): 215 list2[position] = item.decode("utf-8") 216 217 #Determine the newline style of list2 218 lineend = "" 219 if list2 and list2[0]: 220 for candidate in ["\n", "\r", "\n\r"]: 221 if list2[0].endswith(candidate): 222 lineend = candidate 223 if not lineend: 224 lineend = "" 225 226 #Split if directed to do so: 227 if split: 228 splitlist1 = [] 229 splitlist2 = [] 230 for item in list1: 231 splitlist1.extend(item.split()) 232 for item in list2: 233 splitlist2.extend(item.split()) 234 list1.extend([item for item in splitlist2 if not item in splitlist1]) 235 else: 236 #Normal merge, but conform to list1 newline style 237 if list1 != list2: 238 for item in list2: 239 item = item.rstrip(lineend) 240 # avoid duplicate comment lines (this might cause some problems) 241 if item not in list1 or len(item) < 5: 242 list1.append(item)
243 244 if not isinstance(otherpo, pounit): 245 super(pounit, self).merge(otherpo, overwrite, comments) 246 return 247 if comments: 248 mergelists(self.othercomments, otherpo.othercomments) 249 mergelists(self.typecomments, otherpo.typecomments) 250 if not authoritative: 251 # We don't bring across otherpo.automaticcomments as we consider ourself 252 # to be the the authority. Same applies to otherpo.msgidcomments 253 mergelists(self.automaticcomments, otherpo.automaticcomments) 254 # mergelists(self.msgidcomments, otherpo.msgidcomments) #XXX? 255 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 256 if not self.istranslated() or overwrite: 257 # Remove kde-style comments from the translation (if any). XXX - remove 258 if pocommon.extract_msgid_comment(otherpo.target): 259 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 260 self.target = otherpo.target 261 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 262 self.markfuzzy() 263 else: 264 self.markfuzzy(otherpo.isfuzzy()) 265 elif not otherpo.istranslated(): 266 if self.source != otherpo.source: 267 self.markfuzzy() 268 else: 269 if self.target != otherpo.target: 270 self.markfuzzy()
271
272 - def isheader(self):
273 #TODO: fix up nicely 274 return not self.getid() and len(self.target) > 0
275
276 - def isblank(self):
277 if self.isheader() or self.msgidcomment: 278 return False 279 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and len(self._msgctxt) == 0: 280 return True 281 return False
282
283 - def hastypecomment(self, typecomment):
284 """Check whether the given type comment is present""" 285 # check for word boundaries properly by using a regular expression... 286 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
287
288 - def hasmarkedcomment(self, commentmarker):
289 """Check whether the given comment marker is present as # (commentmarker) ...""" 290 # raise DeprecationWarning 291 commentmarker = "(%s)" % commentmarker 292 for comment in self.othercomments: 293 if comment.startswith(commentmarker): 294 return True 295 return False
296
297 - def settypecomment(self, typecomment, present=True):
298 """Alters whether a given typecomment is present""" 299 if self.hastypecomment(typecomment) != present: 300 if present: 301 self.typecomments.append("#, %s\n" % typecomment) 302 else: 303 # this should handle word boundaries properly ... 304 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 305 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
306
307 - def istranslated(self):
308 return super(pounit, self).istranslated() and not self.isobsolete()
309
310 - def istranslatable(self):
311 return not (self.isheader() or self.isblank() or self.isobsolete())
312
313 - def _domarkfuzzy(self, present=True):
314 pass
315
316 - def makeobsolete(self):
317 """Makes this unit obsolete""" 318 self.sourcecomments = [] 319 self.automaticcomments = [] 320 super(pounit, self).makeobsolete()
321
322 - def hasplural(self):
323 """returns whether this pounit contains plural strings...""" 324 source = self.source 325 return isinstance(source, multistring) and len(source.strings) > 1
326
327 - def parse(self, src):
328 raise DeprecationWarning("Should not be parsing with a unit") 329 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
330
331 - def __str__(self):
332 """convert to a string. double check that unicode is handled somehow here""" 333 _cpo_unit = cpo.pounit.buildfromunit(self) 334 return str(_cpo_unit)
335
336 - def getlocations(self):
337 """Get a list of locations from sourcecomments in the PO unit 338 339 rtype: List 340 return: A list of the locations with '#: ' stripped 341 342 """ 343 #TODO: rename to .locations 344 return [urllib.unquote_plus(loc) for loc in self.sourcecomments]
345
346 - def addlocation(self, location):
347 """Add a location to sourcecomments in the PO unit 348 349 @param location: Text location e.g. 'file.c:23' does not include #: 350 @type location: String 351 """ 352 if location.find(" ") != -1: 353 location = urllib.quote_plus(location) 354 self.sourcecomments.extend(location.split())
355
356 - def _extract_msgidcomments(self, text=None):
357 """Extract KDE style msgid comments from the unit. 358 359 @rtype: String 360 @return: Returns the extracted msgidcomments found in this unit's msgid. 361 """ 362 if text: 363 return pocommon.extract_msgid_comment(text) 364 else: 365 return self.msgidcomment
366
367 - def getcontext(self):
368 """Get the message context.""" 369 return self._msgctxt + self.msgidcomment
370
371 - def setcontext(self, context):
372 context = data.forceunicode(context or u"") 373 self._msgctxt = context
374
375 - def getid(self):
376 """Returns a unique identifier for this unit.""" 377 context = self.getcontext() 378 # Gettext does not consider the plural to determine duplicates, only 379 # the msgid. For generation of .mo files, we might want to use this 380 # code to generate the entry for the hash table, but for now, it is 381 # commented out for conformance to gettext. 382 # id = '\0'.join(self.source.strings) 383 id = self.source 384 if self.msgidcomment: 385 id = u"_: %s\n%s" % (context, id) 386 elif context: 387 id = u"%s\04%s" % (context, id) 388 return id
389
390 - def buildfromunit(cls, unit):
391 """Build a native unit from a foreign unit, preserving as much 392 information as possible.""" 393 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 394 return unit.copy() 395 elif isinstance(unit, pocommon.pounit): 396 newunit = cls(unit.source) 397 newunit.target = unit.target 398 #context 399 newunit.msgidcomment = unit._extract_msgidcomments() 400 if not newunit.msgidcomment: 401 newunit._msgctxt = unit.getcontext() 402 403 locations = unit.getlocations() 404 if locations: 405 newunit.addlocations(locations) 406 notes = unit.getnotes("developer") 407 if notes: 408 newunit.addnote(notes, "developer") 409 notes = unit.getnotes("translator") 410 if notes: 411 newunit.addnote(notes, "translator") 412 newunit.markfuzzy(unit.isfuzzy()) 413 if unit.isobsolete(): 414 newunit.makeobsolete() 415 for tc in ['python-format', 'c-format', 'php-format']: 416 if unit.hastypecomment(tc): 417 newunit.settypecomment(tc) 418 break 419 return newunit 420 else: 421 return base.TranslationUnit.buildfromunit(unit)
422 buildfromunit = classmethod(buildfromunit) 423
424 -class pofile(pocommon.pofile):
425 """A .po file containing various units""" 426 UnitClass = pounit 427
428 - def changeencoding(self, newencoding):
429 """Deprecated: changes the encoding on the file.""" 430 # This should not be here but in poheader. It also shouldn't mangle the 431 # header itself, but use poheader methods. All users are removed, so 432 # we can deprecate after one release. 433 raise DeprecationWarning 434 435 self._encoding = encodingToUse(newencoding) 436 if not self.units: 437 return 438 header = self.header() 439 if not header or header.isblank(): 440 return 441 charsetline = None 442 headerstr = header.target 443 for line in headerstr.split("\n"): 444 if not ":" in line: 445 continue 446 key, value = line.strip().split(":", 1) 447 if key.strip() != "Content-Type": 448 continue 449 charsetline = line 450 if charsetline is None: 451 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 452 else: 453 charset = re.search("charset=([^ ]*)", charsetline) 454 if charset is None: 455 newcharsetline = charsetline 456 if not newcharsetline.strip().endswith(";"): 457 newcharsetline += ";" 458 newcharsetline += " charset=%s" % self._encoding 459 else: 460 charset = charset.group(1) 461 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 462 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 463 header.target = headerstr
464
465 - def _build_self_from_cpo(self):
466 """Builds up this store from the internal cpo store. 467 468 A user must ensure that self._cpo_store already exists, and that it is 469 deleted afterwards.""" 470 for unit in self._cpo_store.units: 471 self.addunit(self.UnitClass.buildfromunit(unit)) 472 self._encoding = self._cpo_store._encoding
473
474 - def _build_cpo_from_self(self):
475 """Builds the internal cpo store from the data in self. 476 477 A user must ensure that self._cpo_store does not exist, and should 478 delete it after using it.""" 479 self._cpo_store = cpo.pofile(noheader=True) 480 for unit in self.units: 481 if not unit.isblank(): 482 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit)) 483 if not self._cpo_store.header(): 484 #only add a temporary header 485 self._cpo_store.makeheader(charset="utf-8", encoding="8bit")
486 487
488 - def parse(self, input):
489 """Parses the given file or file source string.""" 490 try: 491 if hasattr(input, 'name'): 492 self.filename = input.name 493 elif not getattr(self, 'filename', ''): 494 self.filename = '' 495 tmp_header_added = False 496 # if isinstance(input, str) and '"Content-Type: text/plain; charset=' not in input[:200]: 497 # input = basic_header + input 498 # tmp_header_added = True 499 self.units = [] 500 self._cpo_store = cpo.pofile(input, noheader=True) 501 self._build_self_from_cpo() 502 del self._cpo_store 503 if tmp_header_added: 504 self.units = self.units[1:] 505 except Exception, e: 506 raise base.ParseError(e)
507
508 - def removeduplicates(self, duplicatestyle="merge"):
509 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 510 # TODO: can we handle consecutive calls to removeduplicates()? What 511 # about files already containing msgctxt? - test 512 id_dict = {} 513 uniqueunits = [] 514 # TODO: this is using a list as the pos aren't hashable, but this is slow. 515 # probably not used frequently enough to worry about it, though. 516 markedpos = [] 517 def addcomment(thepo): 518 thepo.msgidcomment = " ".join(thepo.getlocations()) 519 markedpos.append(thepo)
520 for thepo in self.units: 521 id = thepo.getid() 522 if thepo.isheader() and not thepo.getlocations(): 523 # header msgids shouldn't be merged... 524 uniqueunits.append(thepo) 525 elif id in id_dict: 526 if duplicatestyle == "merge": 527 if id: 528 id_dict[id].merge(thepo) 529 else: 530 addcomment(thepo) 531 uniqueunits.append(thepo) 532 elif duplicatestyle == "msgctxt": 533 origpo = id_dict[id] 534 if origpo not in markedpos: 535 origpo._msgctxt += " ".join(origpo.getlocations()) 536 markedpos.append(thepo) 537 thepo._msgctxt += " ".join(thepo.getlocations()) 538 uniqueunits.append(thepo) 539 else: 540 if not id: 541 if duplicatestyle == "merge": 542 addcomment(thepo) 543 else: 544 thepo._msgctxt += u" ".join(thepo.getlocations()) 545 id_dict[id] = thepo 546 uniqueunits.append(thepo) 547 self.units = uniqueunits
548
549 - def __str__(self):
550 """Convert to a string. double check that unicode is handled somehow here""" 551 self._cpo_store = cpo.pofile(encoding=self._encoding, noheader=True) 552 self._build_cpo_from_self() 553 output = str(self._cpo_store) 554 del self._cpo_store 555 return output
556