Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  from translate.misc.multistring import multistring 
 26  from translate.misc import quote 
 27  from translate.misc import textwrap 
 28  from translate.lang import data 
 29  from translate.storage import pocommon, base 
 30  import re 
 31  import copy 
 32  import cStringIO 
 33  import poparser 
 34   
 35  lsep = "\n#: " 
 36  """Seperator for #: entries""" 
 37   
 38  # general functions for quoting / unquoting po strings 
 39   
 40  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 41  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 42   
43 -def escapeforpo(line):
44 """Escapes a line for po format. assumes no \n occurs in the line. 45 46 @param line: unescaped text 47 """ 48 special_locations = [] 49 for special_key in po_escape_map: 50 special_locations.extend(quote.find_all(line, special_key)) 51 special_locations = dict.fromkeys(special_locations).keys() 52 special_locations.sort() 53 escaped_line = "" 54 last_location = 0 55 for location in special_locations: 56 escaped_line += line[last_location:location] 57 escaped_line += po_escape_map[line[location:location+1]] 58 last_location = location+1 59 escaped_line += line[last_location:] 60 return escaped_line
61
62 -def unescapehandler(escape):
63 64 return po_unescape_map.get(escape, escape)
65
66 -def wrapline(line):
67 """Wrap text for po files.""" 68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 69 70 # Lines should not start with a space... 71 if len(wrappedlines) > 1: 72 for index, line in enumerate(wrappedlines[1:]): 73 if line.startswith(' '): 74 # Remove the space at the beginning of the line: 75 wrappedlines[index+1] = line[1:] 76 77 # Append a space to the previous line: 78 wrappedlines[index] += ' ' 79 return wrappedlines
80
81 -def quoteforpo(text):
82 """quotes the given text for a PO file, returning quoted and escaped lines""" 83 polines = [] 84 if text is None: 85 return polines 86 lines = text.split("\n") 87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 88 if len(lines) != 2 or lines[1]: 89 polines.extend(['""']) 90 for line in lines[:-1]: 91 #TODO: We should only wrap after escaping 92 lns = wrapline(line) 93 if len(lns) > 0: 94 for ln in lns[:-1]: 95 polines.extend(['"' + escapeforpo(ln) + '"']) 96 if lns[-1]: 97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 98 else: 99 polines.extend(['"\\n"']) 100 if lines[-1]: 101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 102 return polines
103
104 -def extractpoline(line):
105 """Remove quote and unescape line from po file. 106 107 @param line: a quoted line from a po file (msgid or msgstr) 108 """ 109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 110 return extracted
111
112 -def unquotefrompo(postr):
113 return u"".join([extractpoline(line) for line in postr])
114
115 -def encodingToUse(encoding):
116 """Tests whether the given encoding is known in the python runtime, or returns utf-8. 117 This function is used to ensure that a valid encoding is always used.""" 118 if encoding == "CHARSET" or encoding == None: 119 return 'utf-8' 120 return encoding
121 # if encoding is None: return False 122 # return True 123 # try: 124 # tuple = codecs.lookup(encoding) 125 # except LookupError: 126 # return False 127 # return True 128
129 -def is_null(lst):
130 return lst == [] or len(lst) == 1 and lst[0] == '""'
131
132 -def extractstr(string):
133 left = string.find('"') 134 right = string.rfind('"') 135 if right > -1: 136 return string[left:right+1] 137 else: 138 return string[left:] + '"'
139
140 -class pounit(pocommon.pounit):
141 # othercomments = [] # # this is another comment 142 # automaticcomments = [] # #. comment extracted from the source code 143 # sourcecomments = [] # #: sourcefile.xxx:35 144 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 145 # prev_msgid = [] # 146 # prev_msgid_plural = [] # 147 # typecomments = [] # #, fuzzy 148 # msgidcomments = [] # _: within msgid 149 # msgctxt 150 # msgid = [] 151 # msgstr = [] 152 153 # Our homegrown way to indicate what must be copied in a shallow 154 # fashion 155 __shallow__ = ['_store'] 156
157 - def __init__(self, source=None, encoding="UTF-8"):
158 self._encoding = encodingToUse(encoding) 159 self.obsolete = False 160 self._initallcomments(blankall=True) 161 self.prev_msgctxt = [] 162 self.prev_msgid = [] 163 self.prev_msgid_plural = [] 164 self.msgctxt = [] 165 self.msgid = [] 166 self.msgid_pluralcomments = [] 167 self.msgid_plural = [] 168 self.msgstr = [] 169 self.obsoletemsgctxt = [] 170 self.obsoletemsgid = [] 171 self.obsoletemsgid_pluralcomments = [] 172 self.obsoletemsgid_plural = [] 173 self.obsoletemsgstr = [] 174 pocommon.pounit.__init__(self, source)
175
176 - def _initallcomments(self, blankall=False):
177 """Initialises allcomments""" 178 if blankall: 179 self.othercomments = [] 180 self.automaticcomments = [] 181 self.sourcecomments = [] 182 self.typecomments = [] 183 self.msgidcomments = [] 184 self.obsoletemsgidcomments = []
185
186 - def _get_all_comments(self):
187 return [self.othercomments, 188 self.automaticcomments, 189 self.sourcecomments, 190 self.typecomments, 191 self.msgidcomments, 192 self.obsoletemsgidcomments]
193 194 allcomments = property(_get_all_comments) 195
196 - def _get_source_vars(self, msgid, msgid_plural):
197 multi = multistring(unquotefrompo(msgid), self._encoding) 198 if self.hasplural(): 199 pluralform = unquotefrompo(msgid_plural) 200 if isinstance(pluralform, str): 201 pluralform = pluralform.decode(self._encoding) 202 multi.strings.append(pluralform) 203 return multi
204
205 - def _set_source_vars(self, source):
206 msgid = None 207 msgid_plural = None 208 if isinstance(source, str): 209 source = source.decode(self._encoding) 210 if isinstance(source, multistring): 211 source = source.strings 212 if isinstance(source, list): 213 msgid = quoteforpo(source[0]) 214 if len(source) > 1: 215 msgid_plural = quoteforpo(source[1]) 216 else: 217 msgid_plural = [] 218 else: 219 msgid = quoteforpo(source) 220 msgid_plural = [] 221 return msgid, msgid_plural
222
223 - def getsource(self):
224 """Returns the unescaped msgid""" 225 return self._get_source_vars(self.msgid, self.msgid_plural)
226
227 - def setsource(self, source):
228 """Sets the msgid to the given (unescaped) value. 229 230 @param source: an unescaped source string. 231 """ 232 self.msgid, self.msgid_plural = self._set_source_vars(source)
233 source = property(getsource, setsource) 234
235 - def _get_prev_source(self):
236 """Returns the unescaped msgid""" 237 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
238
239 - def _set_prev_source(self, source):
240 """Sets the msgid to the given (unescaped) value. 241 242 @param source: an unescaped source string. 243 """ 244 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
245 prev_source = property(_get_prev_source, _set_prev_source) 246
247 - def gettarget(self):
248 """Returns the unescaped msgstr""" 249 if isinstance(self.msgstr, dict): 250 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 251 else: 252 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 253 return multi
254
255 - def settarget(self, target):
256 """Sets the msgstr to the given (unescaped) value""" 257 self._rich_target = None 258 if isinstance(target, str): 259 target = target.decode(self._encoding) 260 if self.hasplural(): 261 if isinstance(target, multistring): 262 target = target.strings 263 elif isinstance(target, basestring): 264 target = [target] 265 elif isinstance(target, (dict, list)): 266 if len(target) == 1: 267 target = target[0] 268 else: 269 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 270 templates = self.msgstr 271 if isinstance(templates, list): 272 templates = {0: templates} 273 if isinstance(target, list): 274 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 275 elif isinstance(target, dict): 276 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 277 else: 278 self.msgstr = quoteforpo(target)
279 target = property(gettarget, settarget) 280
281 - def getnotes(self, origin=None):
282 """Return comments based on origin value (programmer, developer, source code and translator)""" 283 if origin == None: 284 comments = u"".join([comment[2:] for comment in self.othercomments]) 285 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 286 elif origin == "translator": 287 comments = u"".join ([comment[2:] for comment in self.othercomments]) 288 elif origin in ["programmer", "developer", "source code"]: 289 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 290 else: 291 raise ValueError("Comment type not valid") 292 # Let's drop the last newline 293 return comments[:-1]
294
295 - def addnote(self, text, origin=None, position="append"):
296 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 297 # ignore empty strings and strings without non-space characters 298 if not (text and text.strip()): 299 return 300 text = data.forceunicode(text) 301 commentlist = self.othercomments 302 linestart = "# " 303 autocomments = False 304 if origin in ["programmer", "developer", "source code"]: 305 autocomments = True 306 commentlist = self.automaticcomments 307 linestart = "#. " 308 text = text.split("\n") 309 if position == "append": 310 commentlist += [linestart + line + "\n" for line in text] 311 else: 312 newcomments = [linestart + line + "\n" for line in text] 313 newcomments += [line for line in commentlist] 314 if autocomments: 315 self.automaticcomments = newcomments 316 else: 317 self.othercomments = newcomments
318
319 - def removenotes(self):
320 """Remove all the translator's notes (other comments)""" 321 self.othercomments = []
322
323 - def __deepcopy__(self, memo={}):
324 # Make an instance to serve as the copy 325 new_unit = self.__class__() 326 # We'll be testing membership frequently, so make a set from 327 # self.__shallow__ 328 shallow = set(self.__shallow__) 329 # Make deep copies of all members which are not in shallow 330 for key, value in self.__dict__.iteritems(): 331 if key not in shallow: 332 setattr(new_unit, key, copy.deepcopy(value)) 333 # Make shallow copies of all members which are in shallow 334 for key in set(shallow): 335 setattr(new_unit, key, getattr(self, key)) 336 # Mark memo with ourself, so that we won't get deep copied 337 # again 338 memo[id(self)] = self 339 # Return our copied unit 340 return new_unit
341
342 - def copy(self):
343 return copy.deepcopy(self)
344
345 - def _msgidlen(self):
346 if self.hasplural(): 347 return len(unquotefrompo(self.msgid).strip()) + len(unquotefrompo(self.msgid_plural).strip()) 348 else: 349 return len(unquotefrompo(self.msgid).strip())
350
351 - def _msgstrlen(self):
352 if isinstance(self.msgstr, dict): 353 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()]) 354 return len(combinedstr.strip()) 355 else: 356 return len(unquotefrompo(self.msgstr).strip())
357
358 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
359 """Merges the otherpo (with the same msgid) into this one. 360 361 Overwrite non-blank self.msgstr only if overwrite is True 362 merge comments only if comments is True 363 """ 364 365 def mergelists(list1, list2, split=False): 366 #decode where necessary 367 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 368 for position, item in enumerate(list1): 369 if isinstance(item, str): 370 list1[position] = item.decode("utf-8") 371 for position, item in enumerate(list2): 372 if isinstance(item, str): 373 list2[position] = item.decode("utf-8") 374 375 #Determine the newline style of list1 376 lineend = "" 377 if list1 and list1[0]: 378 for candidate in ["\n", "\r", "\n\r"]: 379 if list1[0].endswith(candidate): 380 lineend = candidate 381 if not lineend: 382 lineend = "" 383 else: 384 lineend = "\n" 385 386 #Split if directed to do so: 387 if split: 388 splitlist1 = [] 389 splitlist2 = [] 390 prefix = "#" 391 for item in list1: 392 splitlist1.extend(item.split()[1:]) 393 prefix = item.split()[0] 394 for item in list2: 395 splitlist2.extend(item.split()[1:]) 396 prefix = item.split()[0] 397 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 398 else: 399 #Normal merge, but conform to list1 newline style 400 if list1 != list2: 401 for item in list2: 402 if lineend: 403 item = item.rstrip() + lineend 404 # avoid duplicate comment lines (this might cause some problems) 405 if item not in list1 or len(item) < 5: 406 list1.append(item)
407 if not isinstance(otherpo, pounit): 408 super(pounit, self).merge(otherpo, overwrite, comments) 409 return 410 if comments: 411 mergelists(self.othercomments, otherpo.othercomments) 412 mergelists(self.typecomments, otherpo.typecomments) 413 if not authoritative: 414 # We don't bring across otherpo.automaticcomments as we consider ourself 415 # to be the the authority. Same applies to otherpo.msgidcomments 416 mergelists(self.automaticcomments, otherpo.automaticcomments) 417 mergelists(self.msgidcomments, otherpo.msgidcomments) 418 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 419 if not self.istranslated() or overwrite: 420 # Remove kde-style comments from the translation (if any). 421 if self._extract_msgidcomments(otherpo.target): 422 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 423 self.target = otherpo.target 424 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 425 self.markfuzzy() 426 else: 427 self.markfuzzy(otherpo.isfuzzy()) 428 elif not otherpo.istranslated(): 429 if self.source != otherpo.source: 430 self.markfuzzy() 431 else: 432 if self.target != otherpo.target: 433 self.markfuzzy()
434
435 - def isheader(self):
436 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 437 #rewritten here for performance: 438 return (is_null(self.msgid) 439 and not is_null(self.msgstr) 440 and self.msgidcomments == [] 441 and is_null(self.msgctxt) 442 )
443
444 - def isblank(self):
445 if self.isheader() or len(self.msgidcomments): 446 return False 447 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 448 return True 449 return False
450 # TODO: remove: 451 # Before, the equivalent of the following was the final return statement: 452 # return len(self.source.strip()) == 0 453
454 - def hastypecomment(self, typecomment):
455 """Check whether the given type comment is present""" 456 # check for word boundaries properly by using a regular expression... 457 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
458
459 - def hasmarkedcomment(self, commentmarker):
460 """Check whether the given comment marker is present as # (commentmarker) ...""" 461 commentmarker = "(%s)" % commentmarker 462 for comment in self.othercomments: 463 if comment.replace("#", "", 1).strip().startswith(commentmarker): 464 return True 465 return False
466
467 - def settypecomment(self, typecomment, present=True):
468 """Alters whether a given typecomment is present""" 469 if self.hastypecomment(typecomment) != present: 470 if present: 471 self.typecomments.append("#, %s\n" % typecomment) 472 else: 473 # this should handle word boundaries properly ... 474 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 475 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
476
477 - def isfuzzy(self):
478 return self.hastypecomment("fuzzy")
479
480 - def markfuzzy(self, present=True):
481 self.settypecomment("fuzzy", present)
482
483 - def isobsolete(self):
484 return self.obsolete
485
486 - def makeobsolete(self):
487 """Makes this unit obsolete""" 488 self.obsolete = True 489 if self.msgctxt: 490 self.obsoletemsgctxt = self.msgctxt 491 if self.msgid: 492 self.obsoletemsgid = self.msgid 493 self.msgid = [] 494 if self.msgidcomments: 495 self.obsoletemsgidcomments = self.msgidcomments 496 self.msgidcomments = [] 497 if self.msgid_plural: 498 self.obsoletemsgid_plural = self.msgid_plural 499 self.msgid_plural = [] 500 if self.msgstr: 501 self.obsoletemsgstr = self.msgstr 502 self.msgstr = [] 503 self.sourcecomments = [] 504 self.automaticcomments = []
505
506 - def resurrect(self):
507 """Makes an obsolete unit normal""" 508 self.obsolete = False 509 if self.obsoletemsgctxt: 510 self.msgid = self.obsoletemsgctxt 511 self.obsoletemsgctxt = [] 512 if self.obsoletemsgid: 513 self.msgid = self.obsoletemsgid 514 self.obsoletemsgid = [] 515 if self.obsoletemsgidcomments: 516 self.msgidcomments = self.obsoletemsgidcomments 517 self.obsoletemsgidcomments = [] 518 if self.obsoletemsgid_plural: 519 self.msgid_plural = self.obsoletemsgid_plural 520 self.obsoletemsgid_plural = [] 521 if self.obsoletemsgstr: 522 self.msgstr = self.obsoletemsgstr 523 self.obsoletemgstr = []
524
525 - def hasplural(self):
526 """returns whether this pounit contains plural strings...""" 527 return len(self.msgid_plural) > 0
528
529 - def parse(self, src):
530 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
531
532 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
533 if isinstance(partlines, dict): 534 partkeys = partlines.keys() 535 partkeys.sort() 536 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 537 partstr = partname + " " 538 partstartline = 0 539 if len(partlines) > 0 and len(partcomments) == 0: 540 partstr += partlines[0] 541 partstartline = 1 542 elif len(partcomments) > 0: 543 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 544 # if there is a blank leader line, it must come before the comment 545 partstr += partlines[0] + '\n' 546 # but if the whole string is blank, leave it in 547 if len(partlines) > 1: 548 partstartline += 1 549 else: 550 # All partcomments should start on a newline 551 partstr += '""\n' 552 # combine comments into one if more than one 553 if len(partcomments) > 1: 554 combinedcomment = [] 555 for comment in partcomments: 556 comment = unquotefrompo([comment]) 557 if comment.startswith("_:"): 558 comment = comment[len("_:"):] 559 if comment.endswith("\\n"): 560 comment = comment[:-len("\\n")] 561 #Before we used to strip. Necessary in some cases? 562 combinedcomment.append(comment) 563 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 564 # comments first, no blank leader line needed 565 partstr += "\n".join(partcomments) 566 partstr = quote.rstripeol(partstr) 567 else: 568 partstr += '""' 569 partstr += '\n' 570 # add the rest 571 for partline in partlines[partstartline:]: 572 partstr += partline + '\n' 573 return partstr
574
575 - def _encodeifneccessary(self, output):
576 """encodes unicode strings and returns other strings unchanged""" 577 if isinstance(output, unicode): 578 encoding = encodingToUse(getattr(self, "encoding", "UTF-8")) 579 return output.encode(encoding) 580 return output
581
582 - def __str__(self):
583 """convert to a string. double check that unicode is handled somehow here""" 584 output = self._getoutput() 585 return self._encodeifneccessary(output)
586
587 - def _getoutput(self):
588 """return this po element as a string""" 589 def add_prev_msgid_lines(lines, header, var): 590 if len(var) > 0: 591 lines.append("#| %s %s\n" % (header, var[0])) 592 lines.extend("#| %s\n" % line for line in var[1:])
593 594 def add_prev_msgid_info(lines): 595 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt) 596 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid) 597 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural) 598 599 lines = [] 600 lines.extend(self.othercomments) 601 if self.isobsolete(): 602 lines.extend(self.typecomments) 603 obsoletelines = [] 604 if self.obsoletemsgctxt: 605 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 606 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 607 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 608 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 609 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 610 for index, obsoleteline in enumerate(obsoletelines): 611 # We need to account for a multiline msgid or msgstr here 612 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 613 lines.extend(obsoletelines) 614 lines = [self._encodeifneccessary(line) for line in lines] 615 return "".join(lines) 616 # if there's no msgid don't do msgid and string, unless we're the header 617 # this will also discard any comments other than plain othercomments... 618 if is_null(self.msgid): 619 if not (self.isheader() or self.getcontext() or self.sourcecomments): 620 return "".join(lines) 621 lines.extend(self.automaticcomments) 622 lines.extend(self.sourcecomments) 623 lines.extend(self.typecomments) 624 add_prev_msgid_info(lines) 625 if self.msgctxt: 626 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) 627 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments)) 628 if self.msgid_plural or self.msgid_pluralcomments: 629 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 630 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) 631 lines = [self._encodeifneccessary(line) for line in lines] 632 postr = "".join(lines) 633 return postr 634
635 - def getlocations(self):
636 """Get a list of locations from sourcecomments in the PO unit 637 638 rtype: List 639 return: A list of the locations with '#: ' stripped 640 641 """ 642 locations = [] 643 for sourcecomment in self.sourcecomments: 644 locations += quote.rstripeol(sourcecomment)[3:].split() 645 return locations
646
647 - def addlocation(self, location):
648 """Add a location to sourcecomments in the PO unit 649 650 @param location: Text location e.g. 'file.c:23' does not include #: 651 @type location: String 652 653 """ 654 self.sourcecomments.append("#: %s\n" % location)
655
656 - def _extract_msgidcomments(self, text=None):
657 """Extract KDE style msgid comments from the unit. 658 659 @rtype: String 660 @return: Returns the extracted msgidcomments found in this unit's msgid. 661 """ 662 663 if not text: 664 text = unquotefrompo(self.msgidcomments) 665 return text.split('\n')[0].replace('_: ', '', 1)
666
667 - def setmsgidcomment(self, msgidcomment):
668 if msgidcomment: 669 self.msgidcomments = ['"_: %s\\n"' % msgidcomment] 670 else: 671 self.msgidcomments = []
672 673 msgidcomment = property(_extract_msgidcomments, setmsgidcomment) 674
675 - def getcontext(self):
676 """Get the message context.""" 677 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
678
679 - def getid(self):
680 """Returns a unique identifier for this unit.""" 681 context = self.getcontext() 682 # Gettext does not consider the plural to determine duplicates, only 683 # the msgid. For generation of .mo files, we might want to use this 684 # code to generate the entry for the hash table, but for now, it is 685 # commented out for conformance to gettext. 686 # id = '\0'.join(self.source.strings) 687 id = self.source 688 if self.msgidcomments: 689 id = u"_: %s\n%s" % (context, id) 690 elif context: 691 id = u"%s\04%s" % (context, id) 692 return id
693
694 -class pofile(pocommon.pofile):
695 """A .po file containing various units""" 696 UnitClass = pounit 697
698 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit):
699 """Construct a pofile, optionally reading in from inputfile. 700 encoding can be specified but otherwise will be read from the PO header""" 701 self.UnitClass = unitclass 702 pocommon.pofile.__init__(self, unitclass=unitclass) 703 self.units = [] 704 self.filename = '' 705 self._encoding = encodingToUse(encoding) 706 if inputfile is not None: 707 self.parse(inputfile)
708
709 - def changeencoding(self, newencoding):
710 """Deprecated: changes the encoding on the file.""" 711 # This should not be here but in poheader. It also shouldn't mangle the 712 # header itself, but use poheader methods. All users are removed, so 713 # we can deprecate after one release. 714 raise DeprecationWarning 715 716 self._encoding = encodingToUse(newencoding) 717 if not self.units: 718 return 719 header = self.header() 720 if not header or header.isblank(): 721 return 722 charsetline = None 723 headerstr = unquotefrompo(header.msgstr) 724 for line in headerstr.split("\n"): 725 if not ":" in line: 726 continue 727 key, value = line.strip().split(":", 1) 728 if key.strip() != "Content-Type": 729 continue 730 charsetline = line 731 if charsetline is None: 732 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 733 else: 734 charset = re.search("charset=([^ ]*)", charsetline) 735 if charset is None: 736 newcharsetline = charsetline 737 if not newcharsetline.strip().endswith(";"): 738 newcharsetline += ";" 739 newcharsetline += " charset=%s" % self._encoding 740 else: 741 charset = charset.group(1) 742 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 743 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 744 header.msgstr = quoteforpo(headerstr)
745
746 - def parse(self, input):
747 """Parses the given file or file source string.""" 748 try: 749 if hasattr(input, 'name'): 750 self.filename = input.name 751 elif not getattr(self, 'filename', ''): 752 self.filename = '' 753 if isinstance(input, str): 754 input = cStringIO.StringIO(input) 755 poparser.parse_units(poparser.ParseState(input, pounit), self) 756 except Exception, e: 757 raise base.ParseError(e)
758
759 - def removeduplicates(self, duplicatestyle="merge"):
760 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 761 # TODO: can we handle consecutive calls to removeduplicates()? What 762 # about files already containing msgctxt? - test 763 id_dict = {} 764 uniqueunits = [] 765 # TODO: this is using a list as the pos aren't hashable, but this is slow. 766 # probably not used frequently enough to worry about it, though. 767 markedpos = [] 768 def addcomment(thepo): 769 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 770 markedpos.append(thepo)
771 for thepo in self.units: 772 id = thepo.getid() 773 if thepo.isheader() and not thepo.getlocations(): 774 # header msgids shouldn't be merged... 775 uniqueunits.append(thepo) 776 elif id in id_dict: 777 if duplicatestyle == "merge": 778 if id: 779 id_dict[id].merge(thepo) 780 else: 781 addcomment(thepo) 782 uniqueunits.append(thepo) 783 elif duplicatestyle == "msgctxt": 784 origpo = id_dict[id] 785 if origpo not in markedpos: 786 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 787 markedpos.append(thepo) 788 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 789 uniqueunits.append(thepo) 790 else: 791 if not id: 792 if duplicatestyle == "merge": 793 addcomment(thepo) 794 else: 795 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 796 id_dict[id] = thepo 797 uniqueunits.append(thepo) 798 self.units = uniqueunits
799
800 - def __str__(self):
801 """Convert to a string. double check that unicode is handled somehow here""" 802 output = self._getoutput() 803 if isinstance(output, unicode): 804 return output.encode(getattr(self, "encoding", "UTF-8")) 805 return output
806
807 - def _getoutput(self):
808 """convert the units back to lines""" 809 lines = [] 810 for unit in self.units: 811 unitsrc = str(unit) + "\n" 812 lines.append(unitsrc) 813 lines = "".join(self.encode(lines)).rstrip() 814 #After the last pounit we will have \n\n and we only want to end in \n: 815 if lines: 816 lines += "\n" 817 return lines
818
819 - def encode(self, lines):
820 """encode any unicode strings in lines in self._encoding""" 821 newlines = [] 822 encoding = self._encoding 823 if encoding is None or encoding.lower() == "charset": 824 encoding = 'UTF-8' 825 for line in lines: 826 if isinstance(line, unicode): 827 line = line.encode(encoding) 828 newlines.append(line) 829 return newlines
830
831 - def decode(self, lines):
832 """decode any non-unicode strings in lines with self._encoding""" 833 newlines = [] 834 for line in lines: 835 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 836 try: 837 line = line.decode(self._encoding) 838 except UnicodeError, e: 839 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 840 newlines.append(line) 841 return newlines
842
843 - def unit_iter(self):
844 for unit in self.units: 845 if not (unit.isheader() or unit.isobsolete()): 846 yield unit
847